Take all the reverse-mapping data we've acquired and use it to generate reference count data. This data is used in phase 5 to rebuild the reflink btree. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- repair/phase4.c | 65 +++++++++ repair/rmap.c | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ repair/rmap.h | 4 + 3 files changed, 481 insertions(+), 2 deletions(-) diff --git a/repair/phase4.c b/repair/phase4.c index 2c2cccb..64627a5 100644 --- a/repair/phase4.c +++ b/repair/phase4.c @@ -30,6 +30,8 @@ #include "versions.h" #include "dir2.h" #include "progress.h" +#include "slab.h" +#include "rmap.h" bool collect_rmaps = false; @@ -154,6 +156,61 @@ process_ags( do_inode_prefetch(mp, ag_stride, process_ag_func, true, false); } +static void +process_ag_rmaps( + work_queue_t *wq, + xfs_agnumber_t agno, + void *arg) +{ + int error; + + do_log(_(" - agno = %d\n"), agno); + error = rebuild_ag_rlrmap_records(wq->mp, agno); + if (error) + do_error( +_("%s while processing reverse-mapping records.\n"), + strerror(-error)); +} + +static void +process_inode_reflink_flags( + work_queue_t *wq, + xfs_agnumber_t agno, + void *arg) +{ + int error; + + error = reflink_fix_inode_flags(wq->mp, agno); + if (error) + do_error( +_("%s while fixing inode reflink flags.\n"), + strerror(-error)); +} + +static void +process_rmaps( + xfs_mount_t *mp) +{ + struct work_queue wq; + xfs_agnumber_t i; + + if (!needs_rmap_work(mp)) + return; + + do_log(_(" - processing reverse mapping data...\n")); + create_work_queue(&wq, mp, libxfs_nproc()); + for (i = 0; i < mp->m_sb.sb_agcount; i++) + queue_work(&wq, process_ag_rmaps, i, NULL); + destroy_work_queue(&wq); + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return; + + create_work_queue(&wq, mp, libxfs_nproc()); + for (i = 0; i < mp->m_sb.sb_agcount; i++) + queue_work(&wq, process_inode_reflink_flags, i, NULL); + destroy_work_queue(&wq); +} void phase4(xfs_mount_t *mp) @@ -302,6 +359,14 @@ phase4(xfs_mount_t *mp) * already in phase 3. */ process_ags(mp); + + + /* + * Rebuild the reverse mapping and reflink records based on the + * mappings we observed. + */ + process_rmaps(mp); + print_final_rpt(); /* diff --git a/repair/rmap.c b/repair/rmap.c index 2e1829c..cc34570 100644 --- a/repair/rmap.c +++ b/repair/rmap.c @@ -40,7 +40,6 @@ typedef struct xfs_rmap { xfs_fileoff_t rm_startoff; /* starting file offset */ xfs_agblock_t rm_startblock; /* starting AG block number */ xfs_extlen_t rm_blockcount; /* number of AG blocks */ - struct xfs_rmap *rm_next; /* next item in stack */ } xfs_rmap_t; /* per-AG rmap object anchor */ @@ -150,7 +149,6 @@ add_rmap( rmap.rm_startoff = irec->br_startoff; rmap.rm_startblock = agbno; rmap.rm_blockcount = irec->br_blockcount; - rmap.rm_next = NULL; return slab_add(rmaps, &rmap); } @@ -174,6 +172,312 @@ dump_rmap( #endif /** + * rmap_compare() -- Compare rmap observations for array sorting. + */ +static int +rmap_compare( + const void *a, + const void *b) +{ + const xfs_rmap_t *pa; + const xfs_rmap_t *pb; + + pa = a; pb = b; + if (pa->rm_startblock < pb->rm_startblock) + return -1; + else if (pa->rm_startblock > pb->rm_startblock) + return 1; + else if (pa->rm_ino < pb->rm_ino) + return -1; + else if (pa->rm_ino > pb->rm_ino) + return 1; + else if (pa->rm_startoff < pb->rm_startoff) + return 1; + else if (pa->rm_startoff > pb->rm_startoff) + return 1; + else + return 0; +} + +/** + * rmap_sb_compare() -- Compare function for rmap observations so that they + * come out in pblk order. + */ +static int +rmap_sb_compare( + const void *a, + const void *b) +{ + const xfs_rmap_t *pa; + const xfs_rmap_t *pb; + + pa = a; pb = b; + if (pa->rm_startblock < pb->rm_startblock) + return -1; + else if (pa->rm_startblock > pb->rm_startblock) + return 1; + else + return 0; +} + +/** + * mark_inode_rl() -- Mark all inodes in the reverse-mapping observation stack + * has requiring the reflink inode flag, if the stack depth + * is greater than 1. + * + * @mp: XFS mount object. + * @rmaps: Head of the stack of rmap observations. + * @nr_rmaps: Depth of the stack. + */ +static void +mark_inode_rl( + xfs_mount_t *mp, + xfs_bag_t *rmaps) +{ + xfs_agnumber_t iagno; + xfs_rmap_t *rmap; + ino_tree_node_t *irec; + int off; + size_t idx; + xfs_agino_t ino; + + if (bag_count(rmaps) < 2) + return; + + /* Reflink flag accounting */ + foreach_bag_ptr(rmaps, idx, rmap) { + iagno = XFS_INO_TO_AGNO(mp, rmap->rm_ino); + ino = XFS_INO_TO_AGINO(mp, rmap->rm_ino); + pthread_mutex_lock(&ag_locks[iagno].lock); + irec = find_inode_rec(mp, iagno, ino); + off = get_inode_offset(mp, rmap->rm_ino, irec); + /* lock here because we might go outside this ag */ + set_inode_is_rl(irec, off); + pthread_mutex_unlock(&ag_locks[iagno].lock); + } +} + +/** + * rmap_emit() -- Emit reverse-mapping objects for rmapbt reconstruction + * during phase 5. + * + * @mp: XFS mount object. + * @agno: The AG number. + * @agbno: AG block number of the reverse mapping extent. + * @len: Length of the extent. + * @rmaps: Stack of reverse-mapping observations. + * @nr_rmaps: Depth of the stack. + */ +static void +rmap_emit( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_bag_t *rmaps) +{ + xfs_rmap_t *rmap; + size_t n; + + ASSERT(bag_count(rmaps) > 0); + + foreach_bag_ptr(rmaps, n, rmap) { + ASSERT(rmap->rm_blockcount >= len); + ASSERT(rmap->rm_startblock <= agbno); + dbg_printf("RMAP(%zu): agno=%lu pblk=%llu, len=%lu -> ino=%llu, lblk=%llu\n", + n, (unsigned long)agno, (unsigned long long)agbno, + (unsigned long)len, (unsigned long long)rmap->rm_ino, + (unsigned long long)rmap->rm_startoff); + } +} + +/** + * refcount_emit() -- Emit a reflink object for rlbt reconstruction + * during phase 5. + * + * @mp: XFS mount object. + * @agno: The AG number. + * @agbno: AG block number of the reverse mapping extent. + * @len: Length of the extent. + * @rmaps: Stack of reverse-mapping observations. + * @nr_rmaps: Depth of the stack. + * @is_rmap: True if reverse-mapping is enabled. + * @is_reflink: True if reflinking is enabled. + */ +#define REFCOUNT_CLAMP(nr) ((nr) > MAXRLCOUNT ? MAXRLCOUNT : (nr)) +static void +refcount_emit( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + size_t nr_rmaps) +{ + xfs_reflink_rec_incore_t rlrec; + int error; + xfs_slab_t *rlslab; + + rlslab = ag_rmaps[agno].ar_reflink_items; + ASSERT(nr_rmaps > 0); + + dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n", + agno, agbno, len, nr_rmaps); + rlrec.rr_startblock = agbno; + rlrec.rr_blockcount = len; + rlrec.rr_nlinks = REFCOUNT_CLAMP(nr_rmaps); + error = slab_add(rlslab, &rlrec); + if (error) + do_error( +_("Insufficient memory while recreating reflink tree.")); +} +#undef REFCOUNT_CLAMP + +/** + * rebuild_ag_rlrmap_records() - transform a pile of physical block mapping + * observations into reflink and rmap data for + * eventual rebuilding of the btrees. + * + * XXX: Should the stack be sorted in order of last pblk? + * @mp: XFS mount object. + * @agno: AG number. + */ +#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount) +int +rebuild_ag_rlrmap_records( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + xfs_bag_t *stack_top = NULL; + xfs_slab_t *rmaps; + xfs_slab_cursor_t *rmaps_cur; + xfs_rmap_t *array_cur; + xfs_rmap_t *rmap; + xfs_agblock_t sbno; /* first bno of this rmap set */ + xfs_agblock_t cbno; /* first bno of this refcount set */ + xfs_agblock_t nbno; /* next bno where rmap set changes */ + size_t n, idx; + size_t old_stack_nr; + bool is_rmap; + bool is_reflink; + int error; + + is_reflink = xfs_sb_version_hasreflink(&mp->m_sb); + is_rmap = xfs_sb_version_hasrmapbt(&mp->m_sb); + if (!is_reflink && !is_rmap) + return 0; + + rmaps = ag_rmaps[agno].ar_rmaps; + qsort_slab(rmaps, rmap_compare); + + error = init_slab_cursor(rmaps, rmap_sb_compare, &rmaps_cur); + if (error) + return error; + + error = init_bag(&stack_top); + if (error) + goto err; + + /* While there are rmaps to be processed... */ + n = 0; + while (n < slab_count(rmaps)) { + array_cur = peek_slab_cursor(rmaps_cur); + sbno = cbno = array_cur->rm_startblock; + /* Push all rmaps with pblk == sbno onto the stack */ + for (; + array_cur && array_cur->rm_startblock == sbno; + array_cur = peek_slab_cursor(rmaps_cur)) { + advance_slab_cursor(rmaps_cur); n++; + dump_rmap("push0", agno, array_cur); + error = bag_add(stack_top, array_cur); + if (error) + goto err; + } + mark_inode_rl(mp, stack_top); + + /* Set nbno to the bno of the next refcount change */ + if (n < slab_count(rmaps)) + nbno = array_cur->rm_startblock; + else + nbno = NULLAGBLOCK; + foreach_bag_ptr(stack_top, idx, rmap) { + nbno = min(nbno, RMAP_END(rmap)); + } + + /* Emit reverse mappings, if needed */ + ASSERT(nbno > sbno); + if (is_rmap) { + rmap_emit(mp, agno, sbno, nbno - sbno, stack_top); + } + old_stack_nr = bag_count(stack_top); + + /* While stack isn't empty... */ + while (bag_count(stack_top)) { + /* Pop all rmaps that end at nbno */ + foreach_bag_ptr_reverse(stack_top, idx, rmap) { + if (RMAP_END(rmap) != nbno) + continue; + dump_rmap("pop", agno, rmap); + error = bag_remove(stack_top, idx); + if (error) + goto err; + } + + /* Push array items that start at nbno */ + for (; + array_cur && array_cur->rm_startblock == nbno; + array_cur = peek_slab_cursor(rmaps_cur)) { + advance_slab_cursor(rmaps_cur); n++; + dump_rmap("push1", agno, array_cur); + error = bag_add(stack_top, array_cur); + if (error) + goto err; + } + mark_inode_rl(mp, stack_top); + + /* Emit refcount if necessary */ + ASSERT(nbno > cbno); + if (bag_count(stack_top) != old_stack_nr) { + if (is_reflink && old_stack_nr > 1) { + refcount_emit(mp, agno, cbno, + nbno - cbno, + old_stack_nr); + } + cbno = nbno; + } + + /* Stack empty, go find the next rmap */ + if (bag_count(stack_top) == 0) + break; + old_stack_nr = bag_count(stack_top); + sbno = nbno; + + /* Set nbno to the bno of the next refcount change */ + if (n < slab_count(rmaps)) + nbno = array_cur->rm_startblock; + else + nbno = NULLAGBLOCK; + foreach_bag_ptr(stack_top, idx, rmap) { + nbno = min(nbno, RMAP_END(rmap)); + } + + /* Emit reverse mappings, if needed */ + ASSERT(nbno > sbno); + if (is_rmap) { + rmap_emit(mp, agno, sbno, nbno - sbno, + stack_top); + } + } + } +err: + free_bag(&stack_top); + free_slab_cursor(&rmaps_cur); + free_slab(&ag_rmaps[agno].ar_rmaps); + + return error; +} +#undef RMAP_END + +/** * reflink_record_inode_flag() -- Record that an inode had the reflink flag * set when repair started. The inode reflink * flag will be adjusted as necessary. @@ -204,3 +508,109 @@ reflink_record_inode_flag( dbg_printf("set was_rl lino=%llu was=0x%llx\n", (unsigned long long)lino, (unsigned long long)irec->ino_was_rl); } + +/** + * set_rl() -- Fix an inode's reflink flag. + * + * @mp: XFS mount object. + * @agno: AG number. + * @agino: per-AG inode number. + * @set: True if the flag must be set; False if it must be cleared. + */ +static int +set_rl( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + bool set) +{ + xfs_dinode_t *dino; + xfs_buf_t *buf; + + buf = get_agino_buf(mp, agno, agino, &dino); + if (!buf) + return 1; + ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino)); + + if (set) + do_warn( +_("setting reflink flag on inode %"PRIu64"\n"), + XFS_AGINO_TO_INO(mp, agno, agino)); + else if (!no_modify) /* && !set */ + do_warn( +_("clearing reflink flag on inode %"PRIu64"\n"), + XFS_AGINO_TO_INO(mp, agno, agino)); + if (no_modify) { + libxfs_putbuf(buf); + return 0; + } + if (set) + dino->di_flags |= cpu_to_be16(XFS_DIFLAG_REFLINK); + else + dino->di_flags &= cpu_to_be16(~XFS_DIFLAG_REFLINK); + libxfs_dinode_calc_crc(mp, dino); + libxfs_writebuf(buf, 0); + + return 0; +} + +/** + * reflink_fix_inode_flags() -- Fix discrepancies between the state of the + * inode reflink flag and our observations as to + * whether or not the inode really needs it. + * @mp: XFS mountpoint. + * @agno: AG number. + */ +int +reflink_fix_inode_flags( + xfs_mount_t *mp, + xfs_agnumber_t agno) +{ + ino_tree_node_t *irec; + int bit; + __uint64_t was; + __uint64_t is; + __uint64_t diff; + __uint64_t mask; + int error = 0; + xfs_agino_t agino; + + /* + * Update the reflink flag for any inode where there's a discrepancy + * between the inode flag and whether or not we found any reflinked + * extents. + */ + for (irec = findfirst_inode_rec(agno); + irec != NULL; + irec = next_ino_rec(irec)) { + ASSERT((irec->ino_was_rl & irec->ir_free) == 0); + ASSERT((irec->ino_is_rl & irec->ir_free) == 0); + was = irec->ino_was_rl; + is = irec->ino_is_rl; + if (was == is) + continue; + diff = was ^ is; + dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n", + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, + irec->ino_startnum), + was, is, diff); + + for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) { + agino = bit + irec->ino_startnum; + if (!(diff & mask)) + continue; + else if (was & mask) + error = set_rl(mp, agno, agino, false); + else if (is & mask) + error = set_rl(mp, agno, agino, true); + else + ASSERT(0); + if (error) + do_error( +_("Unable to fix reflink flag on inode %"PRIu64".\n"), + XFS_AGINO_TO_INO(mp, agno, agino)); + } + } + + return error; +} diff --git a/repair/rmap.h b/repair/rmap.h index 16ad157..7dc709f 100644 --- a/repair/rmap.h +++ b/repair/rmap.h @@ -31,4 +31,8 @@ extern void reflink_record_inode_flag(xfs_mount_t *mp, xfs_dinode_t *dino, extern bool needs_rmap_work(xfs_mount_t *mp); +extern int reflink_fix_inode_flags(xfs_mount_t *mp, xfs_agnumber_t agno); + +extern int rebuild_ag_rlrmap_records(xfs_mount_t *mp, xfs_agnumber_t agno); + #endif /* RMAP_H_ */ _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs