From: Darrick J. Wong <djwong@xxxxxxxxxx> Convert the rmap btree repair code to use in-memory rmap btrees to store the observed reverse mapping records. This will eliminate the need for a separate record sorting step, as well as eliminate the need for all the code that turns multiple consecutive bmap records into a single rmap record. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- libxfs/libxfs_api_defs.h | 9 ++ repair/agbtree.c | 18 ++- repair/agbtree.h | 1 repair/phase5.c | 2 repair/rmap.c | 243 ++++++++++++++++++++++++++++++++++++++++++++-- repair/rmap.h | 16 +++ repair/xfs_repair.c | 6 + 7 files changed, 278 insertions(+), 17 deletions(-) diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index 5d73111b508..ce5eb27c1fd 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -56,8 +56,13 @@ #define xfs_btree_bload libxfs_btree_bload #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry #define xfs_btree_del_cursor libxfs_btree_del_cursor +#define xfs_btree_get_block libxfs_btree_get_block +#define xfs_btree_goto_left_edge libxfs_btree_goto_left_edge +#define xfs_btree_increment libxfs_btree_increment #define xfs_btree_init_block libxfs_btree_init_block +#define xfs_btree_mem_head_read_buf libxfs_btree_mem_head_read_buf #define xfs_btree_rec_addr libxfs_btree_rec_addr +#define xfs_btree_visit_blocks libxfs_btree_visit_blocks #define xfs_buf_delwri_submit libxfs_buf_delwri_submit #define xfs_buf_get libxfs_buf_get #define xfs_buf_get_uncached libxfs_buf_get_uncached @@ -170,6 +175,8 @@ #define xfs_rmapbt_init_cursor libxfs_rmapbt_init_cursor #define xfs_rmapbt_maxlevels_ondisk libxfs_rmapbt_maxlevels_ondisk #define xfs_rmapbt_maxrecs libxfs_rmapbt_maxrecs +#define xfs_rmapbt_mem_create libxfs_rmapbt_mem_create +#define xfs_rmapbt_mem_cursor libxfs_rmapbt_mem_cursor #define xfs_rmapbt_stage_cursor libxfs_rmapbt_stage_cursor #define xfs_rmap_compare libxfs_rmap_compare #define xfs_rmap_get_rec libxfs_rmap_get_rec @@ -178,6 +185,7 @@ #define xfs_rmap_irec_offset_unpack libxfs_rmap_irec_offset_unpack #define xfs_rmap_lookup_le libxfs_rmap_lookup_le #define xfs_rmap_lookup_le_range libxfs_rmap_lookup_le_range +#define xfs_rmap_map_raw libxfs_rmap_map_raw #define xfs_rmap_query_all libxfs_rmap_query_all #define xfs_rmap_query_range libxfs_rmap_query_range @@ -225,6 +233,7 @@ #define xfs_validate_stripe_geometry libxfs_validate_stripe_geometry #define xfs_verify_agbno libxfs_verify_agbno +#define xfs_verify_agbext libxfs_verify_agbext #define xfs_verify_agino libxfs_verify_agino #define xfs_verify_cksum libxfs_verify_cksum #define xfs_verify_dir_ino libxfs_verify_dir_ino diff --git a/repair/agbtree.c b/repair/agbtree.c index 23851f17b61..26e282d57c8 100644 --- a/repair/agbtree.c +++ b/repair/agbtree.c @@ -100,7 +100,8 @@ reserve_agblocks( do_error(_("could not set up btree reservation: %s\n"), strerror(-error)); - error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len, + error = rmap_add_agbtree_mapping(mp, agno, + ext_ptr->ex_startblock, len, btr->newbt.oinfo.oi_owner); if (error) do_error(_("could not set up btree rmaps: %s\n"), @@ -593,14 +594,19 @@ get_rmapbt_records( unsigned int nr_wanted, void *priv) { - struct xfs_rmap_irec *rec; struct bt_rebuild *btr = priv; union xfs_btree_rec *block_rec; unsigned int loaded; + int ret; for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { - rec = pop_slab_cursor(btr->slab_cursor); - memcpy(&cur->bc_rec.r, rec, sizeof(struct xfs_rmap_irec)); + ret = rmap_get_mem_rec(&btr->rmapbt_cursor, &cur->bc_rec.r); + if (ret < 0) + return ret; + if (ret == 0) + do_error( + _("ran out of records while rebuilding AG %u rmap btree\n"), + cur->bc_ag.pag->pag_agno); block_rec = libxfs_btree_rec_addr(cur, idx, block); cur->bc_ops->init_rec_from_cur(cur, block_rec); @@ -648,7 +654,7 @@ build_rmap_tree( { int error; - error = rmap_init_cursor(agno, &btr->slab_cursor); + error = rmap_init_mem_cursor(sc->mp, NULL, agno, &btr->rmapbt_cursor); if (error) do_error( _("Insufficient memory to construct rmap cursor.\n")); @@ -661,7 +667,7 @@ _("Error %d while creating rmap btree for AG %u.\n"), error, agno); /* Since we're not writing the AGF yet, no need to commit the cursor */ libxfs_btree_del_cursor(btr->cur, 0); - free_slab_cursor(&btr->slab_cursor); + rmap_free_mem_cursor(NULL, &btr->rmapbt_cursor, 0); } /* rebuild the refcount tree */ diff --git a/repair/agbtree.h b/repair/agbtree.h index 84f7083de20..4fb8e82e03e 100644 --- a/repair/agbtree.h +++ b/repair/agbtree.h @@ -20,6 +20,7 @@ struct bt_rebuild { /* Tree-specific data. */ union { struct xfs_slab_cursor *slab_cursor; + struct rmap_mem_cur rmapbt_cursor; struct { struct extent_tree_node *bno_rec; unsigned int freeblks; diff --git a/repair/phase5.c b/repair/phase5.c index 40f991b6626..361e5649b29 100644 --- a/repair/phase5.c +++ b/repair/phase5.c @@ -703,7 +703,7 @@ phase5(xfs_mount_t *mp) * the superblock counters. */ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - error = rmap_store_ag_btree_rec(mp, agno); + error = rmap_commit_agbtree_mappings(mp, agno); if (error) do_error( _("unable to add AG %u reverse-mapping data to btree.\n"), agno); diff --git a/repair/rmap.c b/repair/rmap.c index 5fbae50d5b7..64a5786faca 100644 --- a/repair/rmap.c +++ b/repair/rmap.c @@ -13,6 +13,8 @@ #include "slab.h" #include "rmap.h" #include "libfrog/bitmap.h" +#include "libxfs/xfile.h" +#include "libxfs/xfbtree.h" #undef RMAP_DEBUG @@ -24,6 +26,7 @@ /* per-AG rmap object anchor */ struct xfs_ag_rmap { + struct xfbtree *ar_xfbtree; /* rmap observations */ struct xfs_slab *ar_rmaps; /* rmap observations, p4 */ struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */ int ar_flcount; /* agfl entries from leftover */ @@ -53,6 +56,65 @@ rmap_needs_work( xfs_has_rmapbt(mp); } +/* Destroy an in-memory rmap btree. */ +STATIC void +rmaps_destroy( + struct xfs_mount *mp, + struct xfs_ag_rmap *ag_rmap) +{ + struct xfile *xfile; + struct xfs_buftarg *target; + + free_slab(&ag_rmap->ar_refcount_items); + + if (!ag_rmap->ar_xfbtree) + return; + + target = ag_rmap->ar_xfbtree->target; + xfile = target->bt_xfile; + + xfbtree_destroy(ag_rmap->ar_xfbtree); + libxfs_buftarg_free(target); + xfile_destroy(xfile); +} + +/* Initialize the in-memory rmap btree for collecting per-AG rmap records. */ +STATIC void +rmaps_init_ag( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_ag_rmap *ag_rmap) +{ + struct xfile *xfile; + struct xfs_buftarg *target; + unsigned long long maxbytes; + int error; + + maxbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_agblocks); + error = xfile_create(mp, maxbytes, "rmapbt repair", &xfile); + if (error) + goto nomem; + + error = -libxfs_alloc_memory_buftarg(mp, xfile, &target); + if (error) + goto nomem; + + error = -libxfs_rmapbt_mem_create(mp, agno, target, + &ag_rmap->ar_xfbtree); + if (error) + goto nomem; + + error = init_slab(&ag_rmap->ar_refcount_items, + sizeof(struct xfs_refcount_irec)); + if (error) + goto nomem; + + return; +nomem: + do_error( +_("Insufficient memory while allocating realtime reverse mapping btree.")); +} + /* * Initialize per-AG reverse map data. */ @@ -71,6 +133,8 @@ rmaps_init( do_error(_("couldn't allocate per-AG reverse map roots\n")); for (i = 0; i < mp->m_sb.sb_agcount; i++) { + rmaps_init_ag(mp, i, &ag_rmaps[i]); + error = init_slab(&ag_rmaps[i].ar_rmaps, sizeof(struct xfs_rmap_irec)); if (error) @@ -82,11 +146,6 @@ _("Insufficient memory while allocating reverse mapping slabs.")); do_error( _("Insufficient memory while allocating raw metadata reverse mapping slabs.")); ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN; - error = init_slab(&ag_rmaps[i].ar_refcount_items, - sizeof(struct xfs_refcount_irec)); - if (error) - do_error( -_("Insufficient memory while allocating refcount item slabs.")); } } @@ -105,7 +164,7 @@ rmaps_free( for (i = 0; i < mp->m_sb.sb_agcount; i++) { free_slab(&ag_rmaps[i].ar_rmaps); free_slab(&ag_rmaps[i].ar_raw_rmaps); - free_slab(&ag_rmaps[i].ar_refcount_items); + rmaps_destroy(mp, &ag_rmaps[i]); } free(ag_rmaps); ag_rmaps = NULL; @@ -136,6 +195,103 @@ rmaps_are_mergeable( return r1->rm_offset + r1->rm_blockcount == r2->rm_offset; } +int +rmap_init_mem_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + struct rmap_mem_cur *rmcur) +{ + struct xfbtree *xfbt; + struct xfs_perag *pag; + int error; + + xfbt = ag_rmaps[agno].ar_xfbtree; + error = -xfbtree_head_read_buf(xfbt, tp, &rmcur->mhead_bp); + if (error) + return error; + + pag = libxfs_perag_get(mp, agno); + rmcur->mcur = libxfs_rmapbt_mem_cursor(pag, tp, rmcur->mhead_bp, xfbt); + + error = -libxfs_btree_goto_left_edge(rmcur->mcur); + if (error) + rmap_free_mem_cursor(tp, rmcur, error); + + libxfs_perag_put(pag); + return error; +} + +void +rmap_free_mem_cursor( + struct xfs_trans *tp, + struct rmap_mem_cur *rmcur, + int error) +{ + libxfs_btree_del_cursor(rmcur->mcur, error); + libxfs_trans_brelse(tp, rmcur->mhead_bp); + rmcur->mcur = NULL; + rmcur->mhead_bp = NULL; +} + +/* + * Retrieve the next record from the in-memory rmap btree. Returns 1 if irec + * has been filled out, 0 if there aren't any more records, or a negative errno + * value if an error happened. + */ +int +rmap_get_mem_rec( + struct rmap_mem_cur *rmcur, + struct xfs_rmap_irec *irec) +{ + int stat = 0; + int error; + + error = -libxfs_btree_increment(rmcur->mcur, 0, &stat); + if (error) + return -error; + if (!stat) + return 0; + + error = -libxfs_rmap_get_rec(rmcur->mcur, irec, &stat); + if (error) + return -error; + + return stat; +} + +static void +rmap_add_mem_rec( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_rmap_irec *rmap) +{ + struct rmap_mem_cur rmcur; + struct xfbtree *xfbt; + struct xfs_trans *tp; + int error; + + xfbt = ag_rmaps[agno].ar_xfbtree; + error = -libxfs_trans_alloc_empty(mp, &tp); + if (error) + do_error(_("allocating tx for in-memory rmap update\n")); + + error = rmap_init_mem_cursor(mp, tp, agno, &rmcur); + if (error) + do_error(_("reading in-memory rmap btree head\n")); + + error = -libxfs_rmap_map_raw(rmcur.mcur, rmap); + if (error) + do_error(_("adding rmap to in-memory btree, err %d\n"), error); + rmap_free_mem_cursor(tp, &rmcur, 0); + + error = xfbtree_trans_commit(xfbt, tp); + if (error) + do_error(_("committing in-memory rmap record\n")); + + libxfs_trans_cancel(tp); +} + /* * Add an observation about a block mapping in an inode's data or attribute * fork for later btree reconstruction. @@ -173,6 +329,9 @@ rmap_add_rec( rmap.rm_blockcount = irec->br_blockcount; if (irec->br_state == XFS_EXT_UNWRITTEN) rmap.rm_flags |= XFS_RMAP_UNWRITTEN; + + rmap_add_mem_rec(mp, agno, &rmap); + last_rmap = &ag_rmaps[agno].ar_last_rmap; if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN) *last_rmap = rmap; @@ -223,6 +382,8 @@ __rmap_add_raw_rec( rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK; rmap.rm_startblock = agbno; rmap.rm_blockcount = len; + + rmap_add_mem_rec(mp, agno, &rmap); return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap); } @@ -273,6 +434,36 @@ rmap_add_ag_rec( return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false); } +/* + * Add a reverse mapping for a per-AG btree extent. These are /not/ tracked + * in the in-memory rmap btree because they can only be added to the rmap + * data after the in-memory btrees have been written to disk. + */ +int +rmap_add_agbtree_mapping( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + uint64_t owner) +{ + struct xfs_rmap_irec rmap = { + .rm_owner = owner, + .rm_startblock = agbno, + .rm_blockcount = len, + }; + struct xfs_perag *pag; + + if (!rmap_needs_work(mp)) + return 0; + + pag = libxfs_perag_get(mp, agno); + assert(libxfs_verify_agbext(pag, agbno, len)); + libxfs_perag_put(pag); + + return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap); +} + /* * Merge adjacent raw rmaps and add them to the main rmap list. */ @@ -441,7 +632,7 @@ rmap_add_fixed_ag_rec( * the rmapbt, after which it is fully regenerated. */ int -rmap_store_ag_btree_rec( +rmap_commit_agbtree_mappings( struct xfs_mount *mp, xfs_agnumber_t agno) { @@ -536,7 +727,7 @@ rmap_store_ag_btree_rec( if (error) goto err; - /* Create cursors to refcount structures */ + /* Create cursors to rmap structures */ error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur); if (error) goto err; @@ -870,6 +1061,21 @@ compute_refcounts( } #undef RMAP_END +static int +count_btree_records( + struct xfs_btree_cur *cur, + int level, + void *data) +{ + uint64_t *nr = data; + struct xfs_btree_block *block; + struct xfs_buf *bp; + + block = libxfs_btree_get_block(cur, level, &bp); + *nr += be16_to_cpu(block->bb_numrecs); + return 0; +} + /* * Return the number of rmap objects for an AG. */ @@ -878,7 +1084,26 @@ rmap_record_count( struct xfs_mount *mp, xfs_agnumber_t agno) { - return slab_count(ag_rmaps[agno].ar_rmaps); + struct rmap_mem_cur rmcur; + uint64_t nr = 0; + int error; + + if (ag_rmaps[agno].ar_xfbtree == NULL) + return 0; + + error = rmap_init_mem_cursor(mp, NULL, agno, &rmcur); + if (error) + do_error(_("%s while reading in-memory rmap btree\n"), + strerror(error)); + + error = -libxfs_btree_visit_blocks(rmcur.mcur, count_btree_records, + XFS_BTREE_VISIT_RECORDS, &nr); + if (error) + do_error(_("%s while counting in-memory rmap records\n"), + strerror(error)); + + rmap_free_mem_cursor(NULL, &rmcur, 0); + return nr; } /* diff --git a/repair/rmap.h b/repair/rmap.h index 782256f8b7e..d8eec58ab8d 100644 --- a/repair/rmap.h +++ b/repair/rmap.h @@ -24,7 +24,10 @@ extern int rmap_fold_raw_recs(struct xfs_mount *mp, xfs_agnumber_t agno); extern bool rmaps_are_mergeable(struct xfs_rmap_irec *r1, struct xfs_rmap_irec *r2); extern int rmap_add_fixed_ag_rec(struct xfs_mount *, xfs_agnumber_t); -extern int rmap_store_ag_btree_rec(struct xfs_mount *, xfs_agnumber_t); + +int rmap_add_agbtree_mapping(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner); +int rmap_commit_agbtree_mappings(struct xfs_mount *mp, xfs_agnumber_t agno); uint64_t rmap_record_count(struct xfs_mount *mp, xfs_agnumber_t agno); extern int rmap_init_cursor(xfs_agnumber_t, struct xfs_slab_cursor **); @@ -49,4 +52,15 @@ extern int fix_inode_reflink_flags(struct xfs_mount *, xfs_agnumber_t); extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool); extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int); +struct rmap_mem_cur { + struct xfs_btree_cur *mcur; + struct xfs_buf *mhead_bp; +}; + +int rmap_init_mem_cursor(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, struct rmap_mem_cur *rmcur); +void rmap_free_mem_cursor(struct xfs_trans *tp, struct rmap_mem_cur *rmcur, + int error); +int rmap_get_mem_rec(struct rmap_mem_cur *rmcur, struct xfs_rmap_irec *irec); + #endif /* RMAP_H_ */ diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index e49d4292ad4..251a46d11fe 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -913,6 +913,12 @@ repair_capture_writeback( struct xfs_mount *mp = bp->b_mount; static pthread_mutex_t wb_mutex = PTHREAD_MUTEX_INITIALIZER; + /* We only care about ondisk metadata. */ + if (bp->b_target != mp->m_ddev_targp && + bp->b_target != mp->m_logdev_targp && + bp->b_target != mp->m_rtdev_targp) + return; + /* * This write hook ignores any buffer that looks like a superblock to * avoid hook recursion when setting NEEDSREPAIR. Higher level code