On 06/24/2018 12:24 PM, Darrick J. Wong wrote:
From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Rebuild the free space btrees from the gaps in the rmap btree. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/Makefile | 1 fs/xfs/scrub/alloc.c | 1 fs/xfs/scrub/alloc_repair.c | 561 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.c | 8 + fs/xfs/scrub/repair.h | 2 fs/xfs/scrub/scrub.c | 4 fs/xfs/xfs_extent_busy.c | 14 + fs/xfs/xfs_extent_busy.h | 4 8 files changed, 591 insertions(+), 4 deletions(-) create mode 100644 fs/xfs/scrub/alloc_repair.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a36cccbec169..841e0824eeb6 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -164,6 +164,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ + alloc_repair.o \ repair.o \ ) endif diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 50e4f7fa06f0..e2514c84cb7a 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -15,7 +15,6 @@ #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_alloc.h" #include "scrub/xfs_scrub.h" diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c new file mode 100644 index 000000000000..c25a2b0d71f1 --- /dev/null +++ b/fs/xfs/scrub/alloc_repair.c @@ -0,0 +1,561 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_alloc.h" +#include "xfs_alloc_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_inode.h" +#include "xfs_refcount.h" +#include "xfs_extent_busy.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" +#include "scrub/repair.h" + +/* + * Free Space Btree Repair + * ======================= + * + * The reverse mappings are supposed to record all space usage for the entire + * AG. Therefore, we can recalculate the free extents in an AG by looking for + * gaps in the physical extents recorded in the rmapbt. On a reflink + * filesystem this is a little more tricky in that we have to be aware that + * the rmap records are allowed to overlap. + * + * We derive which blocks belonged to the old bnobt/cntbt by recording all the + * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG + * metadata: the rmapbt blocks visited while iterating the reverse mappings + * and the AGFL blocks. + * + * Once we have both of those pieces, we can reconstruct the bnobt and cntbt + * by blowing out the free block state and freeing all the extents that we + * found. This adds the requirement that we can't have any busy extents in + * the AG because the busy code cannot handle duplicate records. + * + * Note that we can only rebuild both free space btrees at the same time + * because the regular extent freeing infrastructure loads both btrees at the + * same time. + */ + +struct xfs_repair_alloc_extent { + struct list_head list; + xfs_agblock_t bno; + xfs_extlen_t len; +}; + +struct xfs_repair_alloc { + struct xfs_repair_extent_list nobtlist; /* rmapbt/agfl blocks */ + struct xfs_repair_extent_list *btlist; /* OWN_AG blocks */ + struct list_head *extlist; /* free extents */ + struct xfs_scrub_context *sc; + uint64_t nr_records; /* length of extlist */ + xfs_agblock_t next_bno; /* next bno we want to see */ + xfs_agblock_t nr_blocks; /* free blocks in extlist */
Align the comments on the right to a common column?
+}; + +/* Record extents that aren't in use from gaps in the rmap records. */ +STATIC int +xfs_repair_alloc_extent_fn( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_repair_alloc *ra = priv; + struct xfs_repair_alloc_extent *rae; + xfs_fsblock_t fsb; + int error; + + /* Record all the OWN_AG blocks... */ + if (rec->rm_owner == XFS_RMAP_OWN_AG) { + fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + rec->rm_startblock); + error = xfs_repair_collect_btree_extent(ra->sc, + ra->btlist, fsb, rec->rm_blockcount); + if (error) + return error; + } + + /* ...and all the rmapbt blocks... */ + error = xfs_repair_collect_btree_cur_blocks(ra->sc, cur, + xfs_repair_collect_btree_cur_blocks_in_extent_list, + &ra->nobtlist); + if (error) + return error; + + /* ...and all the free space. */ + if (rec->rm_startblock > ra->next_bno) { + trace_xfs_repair_alloc_extent_fn(cur->bc_mp, + cur->bc_private.a.agno, + ra->next_bno, rec->rm_startblock - ra->next_bno, + XFS_RMAP_OWN_NULL, 0, 0); + + rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent), + KM_MAYFAIL); + if (!rae) + return -ENOMEM; + INIT_LIST_HEAD(&rae->list); + rae->bno = ra->next_bno; + rae->len = rec->rm_startblock - ra->next_bno; + list_add_tail(&rae->list, ra->extlist); + ra->nr_records++; + ra->nr_blocks += rae->len; + } + ra->next_bno = max_t(xfs_agblock_t, ra->next_bno, + rec->rm_startblock + rec->rm_blockcount); + return 0; +}
Alrighty, seems to follow the commentary. Thx!
+ +/* Collect an AGFL block for the not-to-release list. */ +static int +xfs_repair_collect_agfl_block( + struct xfs_mount *mp, + xfs_agblock_t bno, + void *priv) +{ + struct xfs_repair_alloc *ra = priv; + xfs_fsblock_t fsb; + + fsb = XFS_AGB_TO_FSB(mp, ra->sc->sa.agno, bno); + return xfs_repair_collect_btree_extent(ra->sc, &ra->nobtlist, fsb, 1); +} + +/* Compare two btree extents. */ +static int +xfs_repair_allocbt_extent_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_repair_alloc_extent *ap; + struct xfs_repair_alloc_extent *bp; + + ap = container_of(a, struct xfs_repair_alloc_extent, list); + bp = container_of(b, struct xfs_repair_alloc_extent, list); + + if (ap->bno > bp->bno) + return 1; + else if (ap->bno < bp->bno) + return -1; + return 0; +} + +/* Put an extent onto the free list. */ +STATIC int +xfs_repair_allocbt_free_extent(
While on the topic of name shortening, I've noticed other places in the code shorten "extent" to "ext", and it seems pretty readable. Just a suggestion if it helps :-)
+ struct xfs_scrub_context *sc, + xfs_fsblock_t fsbno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + int error; + + error = xfs_free_extent(sc->tp, fsbno, len, oinfo, 0); + if (error) + return error; + error = xfs_repair_roll_ag_trans(sc); + if (error) + return error; + return xfs_mod_fdblocks(sc->mp, -(int64_t)len, false); +} + +/* Find the longest free extent in the list. */ +static struct xfs_repair_alloc_extent * +xfs_repair_allocbt_get_longest( + struct list_head *free_extents) +{ + struct xfs_repair_alloc_extent *rae; + struct xfs_repair_alloc_extent *res = NULL; + + list_for_each_entry(rae, free_extents, list) { + if (!res || rae->len > res->len) + res = rae; + } + return res; +} + +/* Find the shortest free extent in the list. */ +static struct xfs_repair_alloc_extent * +xfs_repair_allocbt_get_shortest( + struct list_head *free_extents) +{ + struct xfs_repair_alloc_extent *rae; + struct xfs_repair_alloc_extent *res = NULL; + + list_for_each_entry(rae, free_extents, list) { + if (!res || rae->len < res->len) + res = rae; + if (res->len == 1) + break; + } + return res; +} + +/* + * Allocate a block from the (cached) shortest extent in the AG. In theory + * this should never fail, since we already checked that there was enough + * space to handle the new btrees. + */ +STATIC xfs_fsblock_t +xfs_repair_allocbt_alloc_block( + struct xfs_scrub_context *sc, + struct list_head *free_extents, + struct xfs_repair_alloc_extent **cached_result) +{ + struct xfs_repair_alloc_extent *ext = *cached_result; + xfs_fsblock_t fsb; + + /* No cached result, see if we can find another. */ + if (!ext) { + ext = xfs_repair_allocbt_get_shortest(free_extents); + ASSERT(ext); + if (!ext) + return NULLFSBLOCK; + } + + /* Subtract one block. */ + fsb = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, ext->bno); + ext->bno++; + ext->len--; + if (ext->len == 0) { + list_del(&ext->list); + kmem_free(ext); + ext = NULL; + } + + *cached_result = ext; + return fsb; +} + +/* Free every record in the extent list. */ +STATIC void +xfs_repair_allocbt_cancel_freelist( + struct list_head *extlist) +{ + struct xfs_repair_alloc_extent *rae; + struct xfs_repair_alloc_extent *n; + + list_for_each_entry_safe(rae, n, extlist, list) { + list_del(&rae->list); + kmem_free(rae); + } +} + +/* + * Iterate all reverse mappings to find (1) the free extents, (2) the OWN_AG + * extents, (3) the rmapbt blocks, and (4) the AGFL blocks. The free space is + * (1) + (2) - (3) - (4). Figure out if we have enough free space to + * reconstruct the free space btrees. Caller must clean up the input lists + * if something goes wrong. + */ +STATIC int +xfs_repair_allocbt_find_freespace( + struct xfs_scrub_context *sc, + struct list_head *free_extents, + struct xfs_repair_extent_list *old_allocbt_blocks) +{ + struct xfs_repair_alloc ra; + struct xfs_repair_alloc_extent *rae; + struct xfs_btree_cur *cur; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agend; + xfs_agblock_t nr_blocks; + int error; + + ra.extlist = free_extents; + ra.btlist = old_allocbt_blocks; + xfs_repair_init_extent_list(&ra.nobtlist); + ra.next_bno = 0; + ra.nr_records = 0; + ra.nr_blocks = 0; + ra.sc = sc; + + /* + * Iterate all the reverse mappings to find gaps in the physical + * mappings, all the OWN_AG blocks, and all the rmapbt extents. + */ + cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno); + error = xfs_rmap_query_all(cur, xfs_repair_alloc_extent_fn, &ra); + if (error) + goto err; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + cur = NULL; + + /* Insert a record for space between the last rmap and EOAG. */ + agend = be32_to_cpu(XFS_BUF_TO_AGF(sc->sa.agf_bp)->agf_length); + if (ra.next_bno < agend) { + rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent), + KM_MAYFAIL); + if (!rae) { + error = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&rae->list); + rae->bno = ra.next_bno; + rae->len = agend - ra.next_bno; + list_add_tail(&rae->list, free_extents); + ra.nr_records++; + } + + /* Collect all the AGFL blocks. */ + error = xfs_agfl_walk(mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), + sc->sa.agfl_bp, xfs_repair_collect_agfl_block, &ra); + if (error) + goto err; + + /* Do we actually have enough space to do this? */ + nr_blocks = 2 * xfs_allocbt_calc_size(mp, ra.nr_records); + if (!xfs_repair_ag_has_space(sc->sa.pag, nr_blocks, XFS_AG_RESV_NONE) || + ra.nr_blocks < nr_blocks) { + error = -ENOSPC; + goto err; + } + + /* Compute the old bnobt/cntbt blocks. */ + error = xfs_repair_subtract_extents(sc, old_allocbt_blocks, + &ra.nobtlist); + if (error) + goto err; + xfs_repair_cancel_btree_extents(sc, &ra.nobtlist); + return 0; + +err: + xfs_repair_cancel_btree_extents(sc, &ra.nobtlist); + if (cur) + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +}
Ok, makes sense after some digging. I might not have figured out the 2 had Dave not pointed that out though. But for the most part the in body comments help a lot. Thx!
-- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at https://urldefense.proofpoint.com/v2/url?u=http-3A__vger.kernel.org_majordomo-2Dinfo.html&d=DwICaQ&c=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE&r=LHZQ8fHvy6wDKXGTWcm97burZH5sQKHRDMaY1UthQxc&m=nNxagNoo077f7e1qascS_gP9gvh_A31xun9uDjsIiRw&s=pV06fEkJolQtBTE33dKzWHVyIvrswKx5pwP148R8jcs&e=
-- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html