[PATCH 04/21] xfs: repair the AGF and AGFL

"Darrick J. Wong" <darrick.wong@xxxxxxxxxx> · Sun, 24 Jun 2018 12:23:54 -0700

From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

Regenerate the AGF and AGFL from the rmap data.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/scrub/agheader_repair.c |  644 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/repair.c          |  106 ++++++-
 fs/xfs/scrub/repair.h          |   15 +
 fs/xfs/scrub/scrub.c           |    4 
 fs/xfs/xfs_trans.c             |   54 +++
 fs/xfs/xfs_trans.h             |    2 
 6 files changed, 814 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 117eedac53df..90e5e6cbc911 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -17,12 +17,18 @@
 #include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
+#include "scrub/repair.h"
 
 /* Superblock */
 
@@ -54,3 +60,641 @@ xfs_repair_superblock(
 	xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
 	return error;
 }
+
+/* AGF */
+
+struct xfs_repair_agf_allocbt {
+	struct xfs_scrub_context	*sc;
+	xfs_agblock_t			freeblks;
+	xfs_agblock_t			longest;
+};
+
+/* Record free space shape information. */
+STATIC int
+xfs_repair_agf_walk_allocbt(
+	struct xfs_btree_cur		*cur,
+	struct xfs_alloc_rec_incore	*rec,
+	void				*priv)
+{
+	struct xfs_repair_agf_allocbt	*raa = priv;
+	int				error = 0;
+
+	if (xfs_scrub_should_terminate(raa->sc, &error))
+		return error;
+
+	raa->freeblks += rec->ar_blockcount;
+	if (rec->ar_blockcount > raa->longest)
+		raa->longest = rec->ar_blockcount;
+	return error;
+}
+
+/* Does this AGFL block look sane? */
+STATIC int
+xfs_repair_agf_check_agfl_block(
+	struct xfs_mount		*mp,
+	xfs_agblock_t			agbno,
+	void				*priv)
+{
+	struct xfs_scrub_context	*sc = priv;
+
+	if (!xfs_verify_agbno(mp, sc->sa.agno, agbno))
+		return -EFSCORRUPTED;
+	return 0;
+}
+
+/* Information for finding AGF-rooted btrees */
+enum {
+	REPAIR_AGF_BNOBT = 0,
+	REPAIR_AGF_CNTBT,
+	REPAIR_AGF_RMAPBT,
+	REPAIR_AGF_REFCOUNTBT,
+	REPAIR_AGF_END,
+	REPAIR_AGF_MAX
+};
+
+static const struct xfs_repair_find_ag_btree repair_agf[] = {
+	[REPAIR_AGF_BNOBT] = {
+		.rmap_owner = XFS_RMAP_OWN_AG,
+		.buf_ops = &xfs_allocbt_buf_ops,
+		.magic = XFS_ABTB_CRC_MAGIC,
+	},
+	[REPAIR_AGF_CNTBT] = {
+		.rmap_owner = XFS_RMAP_OWN_AG,
+		.buf_ops = &xfs_allocbt_buf_ops,
+		.magic = XFS_ABTC_CRC_MAGIC,
+	},
+	[REPAIR_AGF_RMAPBT] = {
+		.rmap_owner = XFS_RMAP_OWN_AG,
+		.buf_ops = &xfs_rmapbt_buf_ops,
+		.magic = XFS_RMAP_CRC_MAGIC,
+	},
+	[REPAIR_AGF_REFCOUNTBT] = {
+		.rmap_owner = XFS_RMAP_OWN_REFC,
+		.buf_ops = &xfs_refcountbt_buf_ops,
+		.magic = XFS_REFC_CRC_MAGIC,
+	},
+	[REPAIR_AGF_END] = {
+		.buf_ops = NULL,
+	},
+};
+
+/*
+ * Find the btree roots.  This is /also/ a chicken and egg problem because we
+ * have to use the rmapbt (rooted in the AGF) to find the btrees rooted in the
+ * AGF.  We also have no idea if the btrees make any sense.  If we hit obvious
+ * corruptions in those btrees we'll bail out.
+ */
+STATIC int
+xfs_repair_agf_find_btrees(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp,
+	struct xfs_repair_find_ag_btree	*fab,
+	struct xfs_buf			*agfl_bp)
+{
+	struct xfs_agf			*old_agf = XFS_BUF_TO_AGF(agf_bp);
+	int				error;
+
+	/* Go find the root data. */
+	memcpy(fab, repair_agf, sizeof(repair_agf));
+	error = xfs_repair_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* We must find the bnobt, cntbt, and rmapbt roots. */
+	if (fab[REPAIR_AGF_BNOBT].root == NULLAGBLOCK ||
+	    fab[REPAIR_AGF_BNOBT].height > XFS_BTREE_MAXLEVELS ||
+	    fab[REPAIR_AGF_CNTBT].root == NULLAGBLOCK ||
+	    fab[REPAIR_AGF_CNTBT].height > XFS_BTREE_MAXLEVELS ||
+	    fab[REPAIR_AGF_RMAPBT].root == NULLAGBLOCK ||
+	    fab[REPAIR_AGF_RMAPBT].height > XFS_BTREE_MAXLEVELS)
+		return -EFSCORRUPTED;
+
+	/*
+	 * We relied on the rmapbt to reconstruct the AGF.  If we get a
+	 * different root then something's seriously wrong.
+	 */
+	if (fab[REPAIR_AGF_RMAPBT].root !=
+	    be32_to_cpu(old_agf->agf_roots[XFS_BTNUM_RMAPi]))
+		return -EFSCORRUPTED;
+
+	/* We must find the refcountbt root if that feature is enabled. */
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb) &&
+	    (fab[REPAIR_AGF_REFCOUNTBT].root == NULLAGBLOCK ||
+	     fab[REPAIR_AGF_REFCOUNTBT].height > XFS_BTREE_MAXLEVELS))
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
+/* Set btree root information in an AGF. */
+STATIC void
+xfs_repair_agf_set_roots(
+	struct xfs_scrub_context	*sc,
+	struct xfs_agf			*agf,
+	struct xfs_repair_find_ag_btree	*fab)
+{
+	agf->agf_roots[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[REPAIR_AGF_BNOBT].root);
+	agf->agf_levels[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[REPAIR_AGF_BNOBT].height);
+
+	agf->agf_roots[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[REPAIR_AGF_CNTBT].root);
+	agf->agf_levels[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[REPAIR_AGF_CNTBT].height);
+
+	agf->agf_roots[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[REPAIR_AGF_RMAPBT].root);
+	agf->agf_levels[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[REPAIR_AGF_RMAPBT].height);
+
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+		agf->agf_refcount_root =
+				cpu_to_be32(fab[REPAIR_AGF_REFCOUNTBT].root);
+		agf->agf_refcount_level =
+				cpu_to_be32(fab[REPAIR_AGF_REFCOUNTBT].height);
+	}
+}
+
+/*
+ * Reinitialize the AGF header, making an in-core copy of the old contents so
+ * that we know which in-core state needs to be reinitialized.
+ */
+STATIC void
+xfs_repair_agf_init_header(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp,
+	struct xfs_agf			*old_agf)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_agf			*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	memcpy(old_agf, agf, sizeof(*old_agf));
+	memset(agf, 0, BBTOB(agf_bp->b_length));
+	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+	agf->agf_seqno = cpu_to_be32(sc->sa.agno);
+	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agf->agf_flfirst = old_agf->agf_flfirst;
+	agf->agf_fllast = old_agf->agf_fllast;
+	agf->agf_flcount = old_agf->agf_flcount;
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
+}
+
+/* Update the AGF btree counters by walking the btrees. */
+STATIC int
+xfs_repair_agf_update_btree_counters(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp)
+{
+	struct xfs_repair_agf_allocbt	raa = { .sc = sc };
+	struct xfs_btree_cur		*cur = NULL;
+	struct xfs_agf			*agf = XFS_BUF_TO_AGF(agf_bp);
+	struct xfs_mount		*mp = sc->mp;
+	xfs_agblock_t			btreeblks;
+	xfs_agblock_t			blocks;
+	int				error;
+
+	/* Update the AGF counters from the bnobt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_BNO);
+	error = xfs_alloc_query_all(cur, xfs_repair_agf_walk_allocbt, &raa);
+	if (error)
+		goto err;
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	btreeblks = blocks - 1;
+	agf->agf_freeblks = cpu_to_be32(raa.freeblks);
+	agf->agf_longest = cpu_to_be32(raa.longest);
+
+	/* Update the AGF counters from the cntbt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_CNT);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	btreeblks += blocks - 1;
+
+	/* Update the AGF counters from the rmapbt. */
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	agf->agf_rmap_blocks = cpu_to_be32(blocks);
+	btreeblks += blocks - 1;
+
+	agf->agf_btreeblks = cpu_to_be32(btreeblks);
+
+	/* Update the AGF counters from the refcountbt. */
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
+				sc->sa.agno, NULL);
+		error = xfs_btree_count_blocks(cur, &blocks);
+		if (error)
+			goto err;
+		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+		agf->agf_refcount_blocks = cpu_to_be32(blocks);
+	}
+
+	return 0;
+err:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/* Trigger reinitialization of the in-core data. */
+STATIC int
+xfs_repair_agf_reinit_incore(
+	struct xfs_scrub_context	*sc,
+	struct xfs_agf			*agf,
+	const struct xfs_agf		*old_agf)
+{
+	struct xfs_perag		*pag;
+
+	/* XXX: trigger fdblocks recalculation */
+
+	/* Now reinitialize the in-core counters if necessary. */
+	pag = sc->sa.pag;
+	if (!pag->pagf_init)
+		return 0;
+
+	pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
+	pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+	pag->pagf_longest = be32_to_cpu(agf->agf_longest);
+	pag->pagf_levels[XFS_BTNUM_BNOi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+	pag->pagf_levels[XFS_BTNUM_CNTi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+	pag->pagf_levels[XFS_BTNUM_RMAPi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+	pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
+
+	return 0;
+}
+
+/* Repair the AGF. */
+int
+xfs_repair_agf(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_repair_find_ag_btree	fab[REPAIR_AGF_MAX];
+	struct xfs_agf			old_agf;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*agf_bp;
+	struct xfs_buf			*agfl_bp;
+	struct xfs_agf			*agf;
+	int				error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xfs_scrub_perag_get(sc->mp, &sc->sa);
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
+	if (error)
+		return error;
+	agf_bp->b_ops = &xfs_agf_buf_ops;
+	agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/*
+	 * Load the AGFL so that we can screen out OWN_AG blocks that are on
+	 * the AGFL now; these blocks might have once been part of the
+	 * bno/cnt/rmap btrees but are not now.  This is a chicken and egg
+	 * problem: the AGF is corrupt, so we have to trust the AGFL contents
+	 * because we can't do any serious cross-referencing with any of the
+	 * btrees rooted in the AGF.  If the AGFL contents are obviously bad
+	 * then we'll bail out.
+	 */
+	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp);
+	if (error)
+		return error;
+
+	/*
+	 * Spot-check the AGFL blocks; if they're obviously corrupt then
+	 * there's nothing we can do but bail out.
+	 */
+	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp,
+			xfs_repair_agf_check_agfl_block, sc);
+	if (error)
+		return error;
+
+	/*
+	 * Find the AGF btree roots.  See the comment for this function for
+	 * more information about the limitations of this repairer; this is
+	 * also a chicken-and-egg situation.
+	 */
+	error = xfs_repair_agf_find_btrees(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* Start rewriting the header and implant the btrees we found. */
+	xfs_repair_agf_init_header(sc, agf_bp, &old_agf);
+	xfs_repair_agf_set_roots(sc, agf, fab);
+	error = xfs_repair_agf_update_btree_counters(sc, agf_bp);
+	if (error)
+		goto out_revert;
+
+	/* Reinitialize in-core state. */
+	error = xfs_repair_agf_reinit_incore(sc, agf, &old_agf);
+	if (error)
+		goto out_revert;
+
+	/* Write this to disk. */
+	xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF);
+	xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1);
+	return 0;
+
+out_revert:
+	memcpy(agf, &old_agf, sizeof(old_agf));
+	return error;
+}
+
+/* AGFL */
+
+struct xfs_repair_agfl {
+	struct xfs_repair_extent_list	agmeta_list;
+	struct xfs_repair_extent_list	*freesp_list;
+	struct xfs_scrub_context	*sc;
+};
+
+/* Record all freespace information. */
+STATIC int
+xfs_repair_agfl_rmap_fn(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
+{
+	struct xfs_repair_agfl		*ra = priv;
+	xfs_fsblock_t			fsb;
+	int				error = 0;
+
+	if (xfs_scrub_should_terminate(ra->sc, &error))
+		return error;
+
+	/* Record all the OWN_AG blocks. */
+	if (rec->rm_owner == XFS_RMAP_OWN_AG) {
+		fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+				rec->rm_startblock);
+		error = xfs_repair_collect_btree_extent(ra->sc,
+				ra->freesp_list, fsb, rec->rm_blockcount);
+		if (error)
+			return error;
+	}
+
+	return xfs_repair_collect_btree_cur_blocks(ra->sc, cur,
+			xfs_repair_collect_btree_cur_blocks_in_extent_list,
+			&ra->agmeta_list);
+}
+
+/* Add a btree block to the agmeta list. */
+STATIC int
+xfs_repair_agfl_visit_btblock(
+	struct xfs_btree_cur		*cur,
+	int				level,
+	void				*priv)
+{
+	struct xfs_repair_agfl		*ra = priv;
+	struct xfs_buf			*bp;
+	xfs_fsblock_t			fsb;
+	int				error = 0;
+
+	if (xfs_scrub_should_terminate(ra->sc, &error))
+		return error;
+
+	xfs_btree_get_block(cur, level, &bp);
+	if (!bp)
+		return 0;
+
+	fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+	return xfs_repair_collect_btree_extent(ra->sc, &ra->agmeta_list,
+			fsb, 1);
+}
+
+/*
+ * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce
+ * which blocks belong to the AGFL.
+ */
+STATIC int
+xfs_repair_agfl_find_extents(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp,
+	struct xfs_repair_extent_list	*agfl_extents,
+	xfs_agblock_t			*flcount)
+{
+	struct xfs_repair_agfl		ra;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_btree_cur		*cur;
+	struct xfs_repair_extent	*rae;
+	int				error;
+
+	ra.sc = sc;
+	ra.freesp_list = agfl_extents;
+	xfs_repair_init_extent_list(&ra.agmeta_list);
+
+	/* Find all space used by the free space btrees & rmapbt. */
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_rmap_query_all(cur, xfs_repair_agfl_rmap_fn, &ra);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+	/* Find all space used by bnobt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_BNO);
+	error = xfs_btree_visit_blocks(cur, xfs_repair_agfl_visit_btblock, &ra);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+	/* Find all space used by cntbt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_CNT);
+	error = xfs_btree_visit_blocks(cur, xfs_repair_agfl_visit_btblock, &ra);
+	if (error)
+		goto err;
+
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+	/*
+	 * Drop the freesp meta blocks that are in use by btrees.
+	 * The remaining blocks /should/ be AGFL blocks.
+	 */
+	error = xfs_repair_subtract_extents(sc, agfl_extents, &ra.agmeta_list);
+	xfs_repair_cancel_btree_extents(sc, &ra.agmeta_list);
+	if (error)
+		return error;
+
+	/* Calculate the new AGFL size. */
+	*flcount = 0;
+	for_each_xfs_repair_extent(rae, agfl_extents) {
+		*flcount += rae->len;
+		if (*flcount > xfs_agfl_size(mp))
+			break;
+	}
+	if (*flcount > xfs_agfl_size(mp))
+		*flcount = xfs_agfl_size(mp);
+	return 0;
+
+err:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	return error;
+}
+
+/* Update the AGF and reset the in-core state. */
+STATIC int
+xfs_repair_agfl_update_agf(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp,
+	xfs_agblock_t			flcount)
+{
+	struct xfs_agf			*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/* XXX: trigger fdblocks recalculation */
+
+	/* Update the AGF counters. */
+	if (sc->sa.pag->pagf_init)
+		sc->sa.pag->pagf_flcount = flcount;
+	agf->agf_flfirst = cpu_to_be32(0);
+	agf->agf_flcount = cpu_to_be32(flcount);
+	agf->agf_fllast = cpu_to_be32(flcount - 1);
+
+	xfs_alloc_log_agf(sc->tp, agf_bp,
+			XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+	return 0;
+}
+
+/* Write out a totally new AGFL. */
+STATIC void
+xfs_repair_agfl_init_header(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agfl_bp,
+	struct xfs_repair_extent_list	*agfl_extents,
+	xfs_agblock_t			flcount)
+{
+	struct xfs_mount		*mp = sc->mp;
+	__be32				*agfl_bno;
+	struct xfs_repair_extent	*rae;
+	struct xfs_repair_extent	*n;
+	struct xfs_agfl			*agfl;
+	xfs_agblock_t			agbno;
+	unsigned int			fl_off;
+
+	/* Start rewriting the header. */
+	agfl = XFS_BUF_TO_AGFL(agfl_bp);
+	memset(agfl, 0xFF, BBTOB(agfl_bp->b_length));
+	agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
+	agfl->agfl_seqno = cpu_to_be32(sc->sa.agno);
+	uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
+
+	/* Fill the AGFL with the remaining blocks. */
+	fl_off = 0;
+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp);
+	for_each_xfs_repair_extent_safe(rae, n, agfl_extents) {
+		agbno = XFS_FSB_TO_AGBNO(mp, rae->fsbno);
+
+		trace_xfs_repair_agfl_insert(mp, sc->sa.agno, agbno, rae->len);
+
+		while (rae->len > 0 && fl_off < flcount) {
+			agfl_bno[fl_off] = cpu_to_be32(agbno);
+			fl_off++;
+			agbno++;
+			rae->fsbno++;
+			rae->len--;
+		}
+
+		if (rae->len)
+			break;
+		list_del(&rae->list);
+		kmem_free(rae);
+	}
+
+	/* Write AGF and AGFL to disk. */
+	xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
+	xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1);
+}
+
+/* Repair the AGFL. */
+int
+xfs_repair_agfl(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_owner_info		oinfo;
+	struct xfs_repair_extent_list	agfl_extents;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*agf_bp;
+	struct xfs_buf			*agfl_bp;
+	xfs_agblock_t			flcount;
+	int				error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xfs_scrub_perag_get(sc->mp, &sc->sa);
+	xfs_repair_init_extent_list(&agfl_extents);
+
+	/*
+	 * Read the AGF so that we can query the rmapbt.  We hope that there's
+	 * nothing wrong with the AGF, but all the AG header repair functions
+	 * have this chicken-and-egg problem.
+	 */
+	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+	if (error)
+		return error;
+	if (!agf_bp)
+		return -ENOMEM;
+
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
+	if (error)
+		return error;
+	agfl_bp->b_ops = &xfs_agfl_buf_ops;
+
+	/*
+	 * Compute the set of old AGFL blocks by subtracting from the list of
+	 * OWN_AG blocks the list of blocks owned by all other OWN_AG metadata
+	 * (bnobt, cntbt, rmapbt).  These are the old AGFL blocks, so return
+	 * that list and the number of blocks we're actually going to put back
+	 * on the AGFL.
+	 */
+	error = xfs_repair_agfl_find_extents(sc, agf_bp, &agfl_extents,
+			&flcount);
+	if (error)
+		goto err;
+
+	/*
+	 * Update AGF and AGFL.  We reset the global free block counter when
+	 * we adjust the AGF flcount (which can fail) so avoid updating any
+	 * bufers until we know that part works.
+	 */
+	error = xfs_repair_agfl_update_agf(sc, agf_bp, flcount);
+	if (error)
+		goto err;
+	xfs_repair_agfl_init_header(sc, agfl_bp, &agfl_extents, flcount);
+
+	/*
+	 * Ok, the AGFL should be ready to go now.  Roll the transaction so
+	 * that we can free any AGFL overflow.
+	 */
+	sc->sa.agf_bp = agf_bp;
+	sc->sa.agfl_bp = agfl_bp;
+	error = xfs_repair_roll_ag_trans(sc);
+	if (error)
+		goto err;
+
+	/* Dump any AGFL overflow. */
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+	return xfs_repair_reap_btree_extents(sc, &agfl_extents, &oinfo,
+			XFS_AG_RESV_AGFL);
+err:
+	xfs_repair_cancel_btree_extents(sc, &agfl_extents);
+	return error;
+}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 326be4e8b71e..bcdaa8df18f6 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -127,9 +127,12 @@ xfs_repair_roll_ag_trans(
 	int				error;
 
 	/* Keep the AG header buffers locked so we can keep going. */
-	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(&sc->tp);
@@ -137,9 +140,12 @@ xfs_repair_roll_ag_trans(
 		goto out_release;
 
 	/* Join AG headers to the new transaction. */
-	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
 
 	return 0;
 
@@ -149,9 +155,12 @@ xfs_repair_roll_ag_trans(
 	 * buffers will be released during teardown on our way out
 	 * of the kernel.
 	 */
-	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
 
 	return error;
 }
@@ -408,6 +417,85 @@ xfs_repair_collect_btree_extent(
 	return 0;
 }
 
+/*
+ * Help record all btree blocks seen while iterating all records of a btree.
+ *
+ * We know that the btree query_all function starts at the left edge and walks
+ * towards the right edge of the tree.  Therefore, we know that we can walk up
+ * the btree cursor towards the root; if the pointer for a given level points
+ * to the first record/key in that block, we haven't seen this block before;
+ * and therefore we need to remember that we saw this block in the btree.
+ *
+ * So if our btree is:
+ *
+ *    4
+ *  / | \
+ * 1  2  3
+ *
+ * Pretend for this example that each leaf block has 100 btree records.  For
+ * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record
+ * that we saw block 1.  Then we observe that bc_ptrs[1] == 1, so we record
+ * block 4.  The list is [1, 4].
+ *
+ * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the
+ * loop.  The list remains [1, 4].
+ *
+ * For the 101st btree record, we've moved onto leaf block 2.  Now
+ * bc_ptrs[0] == 1 again, so we record that we saw block 2.  We see that
+ * bc_ptrs[1] == 2, so we exit the loop.  The list is now [1, 4, 2].
+ *
+ * For the 102nd record, bc_ptrs[0] == 2, so we continue.
+ *
+ * For the 201st record, we've moved on to leaf block 3.  bc_ptrs[0] == 1, so
+ * we add 3 to the list.  Now it is [1, 4, 2, 3].
+ *
+ * For the 300th record we just exit, with the list being [1, 4, 2, 3].
+ *
+ * The *iter_fn can return XFS_BTREE_QUERY_RANGE_ABORT to stop, 0 to keep
+ * iterating, or the usual negative error code.
+ */
+int
+xfs_repair_collect_btree_cur_blocks(
+	struct xfs_scrub_context	*sc,
+	struct xfs_btree_cur		*cur,
+	int				(*iter_fn)(struct xfs_scrub_context *sc,
+						   xfs_fsblock_t fsbno,
+						   xfs_fsblock_t len,
+						   void *priv),
+	void				*priv)
+{
+	struct xfs_buf			*bp;
+	xfs_fsblock_t			fsb;
+	int				i;
+	int				error;
+
+	for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
+		xfs_btree_get_block(cur, i, &bp);
+		if (!bp)
+			continue;
+		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+		error = iter_fn(sc, fsb, 1, priv);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Simple adapter to connect xfs_repair_collect_btree_extent to
+ * xfs_repair_collect_btree_cur_blocks.
+ */
+int
+xfs_repair_collect_btree_cur_blocks_in_extent_list(
+	struct xfs_scrub_context	*sc,
+	xfs_fsblock_t			fsbno,
+	xfs_fsblock_t			len,
+	void				*priv)
+{
+	return xfs_repair_collect_btree_extent(sc, priv, fsbno, len);
+}
+
 /*
  * An error happened during the rebuild so the transaction will be cancelled.
  * The fs will shut down, and the administrator has to unmount and run repair.
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index ef47826b6725..f2af5923aa75 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -48,9 +48,20 @@ xfs_repair_init_extent_list(
 
 #define for_each_xfs_repair_extent_safe(rbe, n, exlist) \
 	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
+#define for_each_xfs_repair_extent(rbe, exlist) \
+	list_for_each_entry((rbe), &(exlist)->list, list)
 int xfs_repair_collect_btree_extent(struct xfs_scrub_context *sc,
 		struct xfs_repair_extent_list *btlist, xfs_fsblock_t fsbno,
 		xfs_extlen_t len);
+int xfs_repair_collect_btree_cur_blocks(struct xfs_scrub_context *sc,
+		struct xfs_btree_cur *cur,
+		int (*iter_fn)(struct xfs_scrub_context *sc,
+			       xfs_fsblock_t fsbno, xfs_fsblock_t len,
+			       void *priv),
+		void *priv);
+int xfs_repair_collect_btree_cur_blocks_in_extent_list(
+		struct xfs_scrub_context *sc, xfs_fsblock_t fsbno,
+		xfs_fsblock_t len, void *priv);
 void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc,
 		struct xfs_repair_extent_list *btlist);
 int xfs_repair_subtract_extents(struct xfs_scrub_context *sc,
@@ -89,6 +100,8 @@ int xfs_repair_ino_dqattach(struct xfs_scrub_context *sc);
 
 int xfs_repair_probe(struct xfs_scrub_context *sc);
 int xfs_repair_superblock(struct xfs_scrub_context *sc);
+int xfs_repair_agf(struct xfs_scrub_context *sc);
+int xfs_repair_agfl(struct xfs_scrub_context *sc);
 
 #else
 
@@ -112,6 +125,8 @@ xfs_repair_calc_ag_resblks(
 
 #define xfs_repair_probe		xfs_repair_notsupported
 #define xfs_repair_superblock		xfs_repair_notsupported
+#define xfs_repair_agf			xfs_repair_notsupported
+#define xfs_repair_agfl			xfs_repair_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 58ae76b3a421..8e11c3c699fb 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -208,13 +208,13 @@ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_agf,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xfs_repair_agf,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_agfl,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xfs_repair_agfl,
 	},
 	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
 		.type	= ST_PERAG,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 524f543c5b82..c08785cf83a9 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -126,6 +126,60 @@ xfs_trans_dup(
 	return ntp;
 }
 
+/*
+ * Try to reserve more blocks for a transaction.  The single use case we
+ * support is for online repair -- use a transaction to gather data without
+ * fear of btree cycle deadlocks; calculate how many blocks we really need
+ * from that data; and only then start modifying data.  This can fail due to
+ * ENOSPC, so we have to be able to cancel the transaction.
+ */
+int
+xfs_trans_reserve_more(
+	struct xfs_trans	*tp,
+	uint			blocks,
+	uint			rtextents)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+	int			error = 0;
+
+	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+	/*
+	 * Attempt to reserve the needed disk blocks by decrementing
+	 * the number needed from the number available.  This will
+	 * fail if the count would go below zero.
+	 */
+	if (blocks > 0) {
+		error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
+		if (error)
+			return -ENOSPC;
+		tp->t_blk_res += blocks;
+	}
+
+	/*
+	 * Attempt to reserve the needed realtime extents by decrementing
+	 * the number needed from the number available.  This will
+	 * fail if the count would go below zero.
+	 */
+	if (rtextents > 0) {
+		error = xfs_mod_frextents(mp, -((int64_t)rtextents));
+		if (error) {
+			error = -ENOSPC;
+			goto out_blocks;
+		}
+		tp->t_rtx_res += rtextents;
+	}
+
+	return 0;
+out_blocks:
+	if (blocks > 0) {
+		xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
+		tp->t_blk_res -= blocks;
+	}
+	return error;
+}
+
 /*
  * This is called to reserve free disk blocks and log space for the
  * given transaction.  This must be done before allocating any resources
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 6526314f0b8f..bdbd3d5fd7b0 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -153,6 +153,8 @@ typedef struct xfs_trans {
 int		xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
 			uint blocks, uint rtextents, uint flags,
 			struct xfs_trans **tpp);
+int		xfs_trans_reserve_more(struct xfs_trans *tp, uint blocks,
+			uint rtextents);
 int		xfs_trans_alloc_empty(struct xfs_mount *mp,
 			struct xfs_trans **tpp);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);

--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html