From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Create some new support structures and functions to assist phase5 in using the btree bulk loader to reconstruct metadata btrees. This is the first step in removing the open-coded rebuilding code. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- repair/phase5.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 218 insertions(+), 21 deletions(-) diff --git a/repair/phase5.c b/repair/phase5.c index 84c05a13..8f5e5f59 100644 --- a/repair/phase5.c +++ b/repair/phase5.c @@ -18,6 +18,7 @@ #include "progress.h" #include "slab.h" #include "rmap.h" +#include "bload.h" /* * we maintain the current slice (path from root to leaf) @@ -65,6 +66,23 @@ typedef struct bt_status { uint64_t owner; /* owner */ } bt_status_t; +/* Context for rebuilding a per-AG btree. */ +struct bt_rebuild { + /* Fake root for staging and space preallocations. */ + struct xrep_newbt newbt; + + /* Geometry of the new btree. */ + struct xfs_btree_bload bload; + + /* Staging btree cursor for the new tree. */ + struct xfs_btree_cur *cur; + + /* Tree-specific data. */ + union { + struct xfs_slab_cursor *slab_cursor; + }; +}; + /* * extra metadata for the agi */ @@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n")); #endif } +/* + * Estimate proper slack values for a btree that's being reloaded. + * + * Under most circumstances, we'll take whatever default loading value the + * btree bulk loading code calculates for us. However, there are some + * exceptions to this rule: + * + * (1) If someone turned one of the debug knobs. + * (2) The AG has less than ~9% space free. + * + * Note that we actually use 3/32 for the comparison to avoid division. + */ +static void +estimate_ag_bload_slack( + struct repair_ctx *sc, + struct xfs_btree_bload *bload, + unsigned int free) +{ + /* + * The global values are set to -1 (i.e. take the bload defaults) + * unless someone has set them otherwise, so we just pull the values + * here. + */ + bload->leaf_slack = bload_leaf_slack; + bload->node_slack = bload_node_slack; + + /* No further changes if there's more than 3/32ths space left. */ + if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5)) + return; + + /* We're low on space; load the btrees as tightly as possible. */ + if (bload->leaf_slack < 0) + bload->leaf_slack = 0; + if (bload->node_slack < 0) + bload->node_slack = 0; +} + +/* Initialize a btree rebuild context. */ +static void +init_rebuild( + struct repair_ctx *sc, + const struct xfs_owner_info *oinfo, + xfs_agblock_t free_space, + struct bt_rebuild *btr) +{ + memset(btr, 0, sizeof(struct bt_rebuild)); + + xrep_newbt_init_bare(&btr->newbt, sc); + btr->newbt.oinfo = *oinfo; /* struct copy */ + estimate_ag_bload_slack(sc, &btr->bload, free_space); +} + +/* + * Update this free space record to reflect the blocks we stole from the + * beginning of the record. + */ +static void +consume_freespace( + xfs_agnumber_t agno, + struct extent_tree_node *ext_ptr, + uint32_t len) +{ + struct extent_tree_node *bno_ext_ptr; + xfs_agblock_t new_start = ext_ptr->ex_startblock + len; + xfs_extlen_t new_len = ext_ptr->ex_blockcount - len; + + /* Delete the used-up extent from both extent trees. */ +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "releasing extent: %u [%u %u]\n", agno, + ext_ptr->ex_startblock, ext_ptr->ex_blockcount); +#endif + bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock); + ASSERT(bno_ext_ptr != NULL); + get_bno_extent(agno, bno_ext_ptr); + release_extent_tree_node(bno_ext_ptr); + + ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock, + ext_ptr->ex_blockcount); + release_extent_tree_node(ext_ptr); + + /* + * If we only used part of this last extent, then we must reinsert the + * extent in the extent trees. + */ + if (new_len > 0) { + add_bno_extent(agno, new_start, new_len); + add_bcnt_extent(agno, new_start, new_len); + } +} + +/* Reserve blocks for the new btree. */ +static void +setup_rebuild( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct bt_rebuild *btr, + uint32_t nr_blocks) +{ + struct extent_tree_node *ext_ptr; + uint32_t blocks_allocated = 0; + uint32_t len; + int error; + + while (blocks_allocated < nr_blocks) { + /* + * Grab the smallest extent and use it up, then get the + * next smallest. This mimics the init_*_cursor code. + */ + ext_ptr = findfirst_bcnt_extent(agno); + if (!ext_ptr) + do_error( +_("error - not enough free space in filesystem\n")); + + /* Use up the extent we've got. */ + len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated); + error = xrep_newbt_add_blocks(&btr->newbt, + XFS_AGB_TO_FSB(mp, agno, + ext_ptr->ex_startblock), + len); + if (error) + do_error(_("could not set up btree reservation: %s\n"), + strerror(-error)); + + error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len, + btr->newbt.oinfo.oi_owner); + if (error) + do_error(_("could not set up btree rmaps: %s\n"), + strerror(-error)); + + consume_freespace(agno, ext_ptr, len); + blocks_allocated += len; + } +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "blocks_allocated = %d\n", + blocks_allocated); +#endif +} + +/* Feed one of the new btree blocks to the bulk loader. */ +static int +rebuild_claim_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + void *priv) +{ + struct bt_rebuild *btr = priv; + + return xrep_newbt_claim_block(cur, &btr->newbt, ptr); +} + static void write_cursor(bt_status_t *curs) { @@ -336,6 +504,34 @@ finish_cursor(bt_status_t *curs) free(curs->btree_blocks); } +/* + * Scoop up leftovers from a rebuild cursor for later freeing, then free the + * rebuild context. + */ +static void +finish_rebuild( + struct xfs_mount *mp, + struct bt_rebuild *btr, + struct xfs_slab *lost_fsb) +{ + struct xrep_newbt_resv *resv, *n; + + for_each_xrep_newbt_reservation(&btr->newbt, resv, n) { + while (resv->used < resv->len) { + xfs_fsblock_t fsb = resv->fsbno + resv->used; + int error; + + error = slab_add(lost_fsb, &fsb); + if (error) + do_error( +_("Insufficient memory saving lost blocks.\n")); + resv->used++; + } + } + + xrep_newbt_destroy(&btr->newbt, 0); +} + /* * We need to leave some free records in the tree for the corner case of * setting up the AGFL. This may require allocation of blocks, and as @@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp) static void phase5_func( - xfs_mount_t *mp, - xfs_agnumber_t agno, - struct xfs_slab *lost_fsb) + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_slab *lost_fsb) { - uint64_t num_inos; - uint64_t num_free_inos; - uint64_t finobt_num_inos; - uint64_t finobt_num_free_inos; - bt_status_t bno_btree_curs; - bt_status_t bcnt_btree_curs; - bt_status_t ino_btree_curs; - bt_status_t fino_btree_curs; - bt_status_t rmap_btree_curs; - bt_status_t refcnt_btree_curs; - int extra_blocks = 0; - uint num_freeblocks; - xfs_extlen_t freeblks1; + struct repair_ctx sc = { .mp = mp, }; + struct agi_stat agi_stat = {0,}; + uint64_t num_inos; + uint64_t num_free_inos; + uint64_t finobt_num_inos; + uint64_t finobt_num_free_inos; + bt_status_t bno_btree_curs; + bt_status_t bcnt_btree_curs; + bt_status_t ino_btree_curs; + bt_status_t fino_btree_curs; + bt_status_t rmap_btree_curs; + bt_status_t refcnt_btree_curs; + int extra_blocks = 0; + uint num_freeblocks; + xfs_extlen_t freeblks1; #ifdef DEBUG - xfs_extlen_t freeblks2; + xfs_extlen_t freeblks2; #endif - xfs_agblock_t num_extents; - struct agi_stat agi_stat = {0,}; + xfs_agblock_t num_extents; if (verbose) do_log(_(" - agno = %d\n"), agno); @@ -2516,8 +2713,8 @@ inject_lost_blocks( if (error) goto out_cancel; - error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG, - XFS_AG_RESV_NONE); + error = -libxfs_free_extent(tp, *fsb, 1, + &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE); if (error) goto out_cancel;