[PATCH 064/145] xfs_repair: rebuild reverse-mapping btree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Rebuild the reverse-mapping btree with the rmap observations
corresponding to file extents, bmbt blocks, and fixed per-AG metadata.

v2: Update to use the flags in rm_flags and to support high key
updates for the rmapbt.

v3: Initialize rm_flags to zero so as to avoid corruption problems
later when we stash non-key flags in the offset field.

v4: Leave a few empty slots in each rmapbt leaf when we're rebuilding
the rmapbt so that we can insert records for the AG metadata blocks
without causing too many btree splits.  This (hopefully) prevents the
situation where running xfs_repair greatly increases the size of the
btree.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 repair/phase5.c |  407 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 393 insertions(+), 14 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index b58111b..bb065ec 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -28,6 +28,8 @@
 #include "versions.h"
 #include "threads.h"
 #include "progress.h"
+#include "slab.h"
+#include "rmap.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -1326,6 +1328,359 @@ nextrec:
 	}
 }
 
+/* rebuild the rmap tree */
+
+/*
+ * we don't have to worry here about how chewing up free extents
+ * may perturb things because rmap tree building happens before
+ * freespace tree building.
+ */
+static void
+init_rmapbt_cursor(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_status	*btree_curs)
+{
+	size_t			num_recs;
+	int			level;
+	struct bt_stat_level	*lptr;
+	struct bt_stat_level	*p_lptr;
+	xfs_extlen_t		blocks_allocated;
+	int			maxrecs;
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		memset(btree_curs, 0, sizeof(struct bt_status));
+		return;
+	}
+
+	lptr = &btree_curs->level[0];
+	btree_curs->init = 1;
+
+	/*
+	 * build up statistics
+	 */
+	num_recs = rmap_record_count(mp, agno);
+	if (num_recs == 0) {
+		/*
+		 * easy corner-case -- no rmap records
+		 */
+		lptr->num_blocks = 1;
+		lptr->modulo = 0;
+		lptr->num_recs_pb = 0;
+		lptr->num_recs_tot = 0;
+
+		btree_curs->num_levels = 1;
+		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+
+		setup_cursor(mp, agno, btree_curs);
+
+		return;
+	}
+
+	/*
+	 * Leave enough slack in the rmapbt that we can insert the
+	 * metadata AG entries without too many splits.
+	 */
+	maxrecs = mp->m_rmap_mxr[0];
+	if (num_recs > maxrecs)
+		maxrecs -= 10;
+	blocks_allocated = lptr->num_blocks = howmany(num_recs, maxrecs);
+
+	lptr->modulo = num_recs % lptr->num_blocks;
+	lptr->num_recs_pb = num_recs / lptr->num_blocks;
+	lptr->num_recs_tot = num_recs;
+	level = 1;
+
+	if (lptr->num_blocks > 1)  {
+		for (; btree_curs->level[level-1].num_blocks > 1
+				&& level < XFS_BTREE_MAXLEVELS;
+				level++)  {
+			lptr = &btree_curs->level[level];
+			p_lptr = &btree_curs->level[level - 1];
+			lptr->num_blocks = howmany(p_lptr->num_blocks,
+				mp->m_rmap_mxr[1]);
+			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
+			lptr->num_recs_pb = p_lptr->num_blocks
+					/ lptr->num_blocks;
+			lptr->num_recs_tot = p_lptr->num_blocks;
+
+			blocks_allocated += lptr->num_blocks;
+		}
+	}
+	ASSERT(lptr->num_blocks == 1);
+	btree_curs->num_levels = level;
+
+	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
+			= blocks_allocated;
+
+	setup_cursor(mp, agno, btree_curs);
+}
+
+static void
+prop_rmap_cursor(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_status	*btree_curs,
+	struct xfs_rmap_irec	*rm_rec,
+	int			level)
+{
+	struct xfs_btree_block	*bt_hdr;
+	struct xfs_rmap_key	*bt_key;
+	xfs_rmap_ptr_t		*bt_ptr;
+	xfs_agblock_t		agbno;
+	struct bt_stat_level	*lptr;
+
+	level++;
+
+	if (level >= btree_curs->num_levels)
+		return;
+
+	lptr = &btree_curs->level[level];
+	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+
+	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
+		/*
+		 * this only happens once to initialize the
+		 * first path up the left side of the tree
+		 * where the agbno's are already set up
+		 */
+		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
+	}
+
+	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
+				lptr->num_recs_pb + (lptr->modulo > 0))  {
+		/*
+		 * write out current prev block, grab us a new block,
+		 * and set the rightsib pointer of current block
+		 */
+#ifdef XR_BLD_INO_TRACE
+		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
+#endif
+		if (lptr->prev_agbno != NULLAGBLOCK)  {
+			ASSERT(lptr->prev_buf_p != NULL);
+			libxfs_writebuf(lptr->prev_buf_p, 0);
+		}
+		lptr->prev_agbno = lptr->agbno;
+		lptr->prev_buf_p = lptr->buf_p;
+		agbno = get_next_blockaddr(agno, level, btree_curs);
+
+		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		lptr->agbno = agbno;
+
+		if (lptr->modulo)
+			lptr->modulo--;
+
+		/*
+		 * initialize block header
+		 */
+		lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+		xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+					level, 0, agno,
+					XFS_BTREE_CRC_BLOCKS);
+
+		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+
+		/*
+		 * propagate extent record for first extent in new block up
+		 */
+		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
+	}
+	/*
+	 * add inode info to current block
+	 */
+	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+
+	bt_key = XFS_RMAP_KEY_ADDR(bt_hdr,
+				    be16_to_cpu(bt_hdr->bb_numrecs));
+	bt_ptr = XFS_RMAP_PTR_ADDR(bt_hdr,
+				    be16_to_cpu(bt_hdr->bb_numrecs),
+				    mp->m_rmap_mxr[1]);
+
+	bt_key->rm_startblock = cpu_to_be32(rm_rec->rm_startblock);
+	bt_key->rm_owner = cpu_to_be64(rm_rec->rm_owner);
+	bt_key->rm_offset = cpu_to_be64(rm_rec->rm_offset);
+
+	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+}
+
+static void
+prop_rmap_highkey(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_status	*btree_curs,
+	struct xfs_rmap_irec	*rm_highkey)
+{
+	struct xfs_btree_block	*bt_hdr;
+	struct xfs_rmap_key	*bt_key;
+	struct bt_stat_level	*lptr;
+	struct xfs_rmap_irec	key;
+	struct xfs_rmap_irec	high_key;
+	int			level;
+	int			i;
+	int			numrecs;
+
+	key.rm_flags = 0;
+	high_key = *rm_highkey;
+	for (level = 1; level < btree_curs->num_levels; level++) {
+		lptr = &btree_curs->level[level];
+		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+		numrecs = be16_to_cpu(bt_hdr->bb_numrecs);
+		bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, numrecs);
+
+		bt_key->rm_startblock = cpu_to_be32(high_key.rm_startblock);
+		bt_key->rm_owner = cpu_to_be64(high_key.rm_owner);
+		bt_key->rm_offset = cpu_to_be64(
+				xfs_rmap_irec_offset_pack(&high_key));
+
+		for (i = 1; i < numrecs - 1; i++) {
+			bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, i);
+			key.rm_startblock = be32_to_cpu(bt_key->rm_startblock);
+			key.rm_owner = be64_to_cpu(bt_key->rm_owner);
+			key.rm_offset = be64_to_cpu(bt_key->rm_offset);
+			if (rmap_diffkeys(&high_key, &key) > 0)
+				high_key = key;
+		}
+	}
+}
+
+/*
+ * rebuilds a rmap btree given a cursor.
+ */
+static void
+build_rmap_tree(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_status	*btree_curs)
+{
+	xfs_agnumber_t		i;
+	xfs_agblock_t		j;
+	xfs_agblock_t		agbno;
+	struct xfs_btree_block	*bt_hdr;
+	struct xfs_rmap_irec	*rm_rec;
+	struct xfs_slab_cursor	*rmap_cur;
+	struct xfs_rmap_rec	*bt_rec;
+	struct xfs_rmap_irec	highest_key;
+	struct xfs_rmap_irec	hi_key;
+	struct bt_stat_level	*lptr;
+	int			level = btree_curs->num_levels;
+	int			error;
+
+	highest_key.rm_flags = 0;
+	for (i = 0; i < level; i++)  {
+		lptr = &btree_curs->level[i];
+
+		agbno = get_next_blockaddr(agno, i, btree_curs);
+		lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, agbno),
+					XFS_FSB_TO_BB(mp, 1));
+
+		if (i == btree_curs->num_levels - 1)
+			btree_curs->root = agbno;
+
+		lptr->agbno = agbno;
+		lptr->prev_agbno = NULLAGBLOCK;
+		lptr->prev_buf_p = NULL;
+		/*
+		 * initialize block header
+		 */
+
+		lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+		xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+					i, 0, agno,
+					XFS_BTREE_CRC_BLOCKS);
+	}
+
+	/*
+	 * run along leaf, setting up records.  as we have to switch
+	 * blocks, call the prop_rmap_cursor routine to set up the new
+	 * pointers for the parent.  that can recurse up to the root
+	 * if required.  set the sibling pointers for leaf level here.
+	 */
+	error = init_rmap_cursor(agno, &rmap_cur);
+	if (error)
+		do_error(
+_("Insufficient memory to construct reverse-map cursor."));
+	rm_rec = pop_slab_cursor(rmap_cur);
+	lptr = &btree_curs->level[0];
+
+	for (i = 0; i < lptr->num_blocks; i++)  {
+		/*
+		 * block initialization, lay in block header
+		 */
+		lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+		xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+					0, 0, agno,
+					XFS_BTREE_CRC_BLOCKS);
+
+		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
+							(lptr->modulo > 0));
+
+		if (lptr->modulo > 0)
+			lptr->modulo--;
+
+		if (lptr->num_recs_pb > 0)
+			prop_rmap_cursor(mp, agno, btree_curs, rm_rec, 0);
+
+		bt_rec = (struct xfs_rmap_rec *)
+			  ((char *)bt_hdr + XFS_RMAP_BLOCK_LEN);
+		highest_key.rm_startblock = 0;
+		highest_key.rm_owner = 0;
+		highest_key.rm_offset = 0;
+		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
+			ASSERT(rm_rec != NULL);
+			bt_rec[j].rm_startblock =
+					cpu_to_be32(rm_rec->rm_startblock);
+			bt_rec[j].rm_blockcount =
+					cpu_to_be32(rm_rec->rm_blockcount);
+			bt_rec[j].rm_owner = cpu_to_be64(rm_rec->rm_owner);
+			bt_rec[j].rm_offset = cpu_to_be64(
+					xfs_rmap_irec_offset_pack(rm_rec));
+			rmap_high_key_from_rec(rm_rec, &hi_key);
+			if (rmap_diffkeys(&highest_key, &hi_key) > 0)
+				highest_key = hi_key;
+
+			rm_rec = pop_slab_cursor(rmap_cur);
+		}
+
+		/* Now go set the parent key */
+		prop_rmap_highkey(mp, agno, btree_curs, &highest_key);
+
+		if (rm_rec != NULL)  {
+			/*
+			 * get next leaf level block
+			 */
+			if (lptr->prev_buf_p != NULL)  {
+#ifdef XR_BLD_RL_TRACE
+				fprintf(stderr, "writing rmapbt agbno %u\n",
+					lptr->prev_agbno);
+#endif
+				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+				libxfs_writebuf(lptr->prev_buf_p, 0);
+			}
+			lptr->prev_buf_p = lptr->buf_p;
+			lptr->prev_agbno = lptr->agbno;
+			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
+			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+
+			lptr->buf_p = libxfs_getbuf(mp->m_dev,
+					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+					XFS_FSB_TO_BB(mp, 1));
+		}
+	}
+	free_slab_cursor(&rmap_cur);
+}
+
 /*
  * build both the agf and the agfl for an agno given both
  * btree cursors.
@@ -1333,19 +1688,21 @@ nextrec:
  * XXX: yet more common code that can be shared with mkfs/growfs.
  */
 static void
-build_agf_agfl(xfs_mount_t	*mp,
-		xfs_agnumber_t	agno,
-		bt_status_t	*bno_bt,
-		bt_status_t	*bcnt_bt,
-		xfs_extlen_t	freeblks,	/* # free blocks in tree */
-		int		lostblocks)	/* # blocks that will be lost */
+build_agf_agfl(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_status	*bno_bt,
+	struct bt_status	*bcnt_bt,
+	xfs_extlen_t		freeblks,	/* # free blocks in tree */
+	int			lostblocks,	/* # blocks that will be lost */
+	struct bt_status	*rmap_bt)
 {
-	extent_tree_node_t	*ext_ptr;
-	xfs_buf_t		*agf_buf, *agfl_buf;
+	struct extent_tree_node	*ext_ptr;
+	struct xfs_buf		*agf_buf, *agfl_buf;
 	int			i;
 	int			j;
-	xfs_agfl_t		*agfl;
-	xfs_agf_t		*agf;
+	struct xfs_agfl		*agfl;
+	struct xfs_agf		*agf;
 	__be32			*freelist;
 
 	agf_buf = libxfs_getbuf(mp->m_dev,
@@ -1377,20 +1734,25 @@ build_agf_agfl(xfs_mount_t	*mp,
 	agf->agf_levels[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->num_levels);
 	agf->agf_roots[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->root);
 	agf->agf_levels[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->num_levels);
+	agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
+	agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
 
 	/*
 	 * Count and record the number of btree blocks consumed if required.
 	 */
 	if (xfs_sb_version_haslazysbcount(&mp->m_sb)) {
+		unsigned int blks;
 		/*
 		 * Don't count the root blocks as they are already
 		 * accounted for.
 		 */
-		agf->agf_btreeblks = cpu_to_be32(
-			(bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
+		blks = (bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
 			(bcnt_bt->num_tot_blocks - bcnt_bt->num_free_blocks) -
-			2);
+			2;
+		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+			blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
+		agf->agf_btreeblks = cpu_to_be32(blks);
 #ifdef XR_BLD_FREE_TRACE
 		fprintf(stderr, "agf->agf_btreeblks = %u\n",
 				be32_to_cpu(agf->agf_btreeblks));
@@ -1586,6 +1948,7 @@ phase5_func(
 	bt_status_t	bcnt_btree_curs;
 	bt_status_t	ino_btree_curs;
 	bt_status_t	fino_btree_curs;
+	bt_status_t	rmap_btree_curs;
 	int		extra_blocks = 0;
 	uint		num_freeblocks;
 	xfs_extlen_t	freeblks1;
@@ -1641,6 +2004,12 @@ phase5_func(
 		sb_icount_ag[agno] += num_inos;
 		sb_ifree_ag[agno] += num_free_inos;
 
+		/*
+		 * Set up the btree cursors for the on-disk rmap btrees,
+		 * which includes pre-allocating all required blocks.
+		 */
+		init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+
 		num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
 		/*
 		 * lose two blocks per AG -- the space tree roots
@@ -1725,11 +2094,19 @@ phase5_func(
 
 		ASSERT(freeblks1 == freeblks2);
 
+		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+			build_rmap_tree(mp, agno, &rmap_btree_curs);
+			write_cursor(&rmap_btree_curs);
+			sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
+					rmap_btree_curs.num_free_blocks) - 1;
+		}
+
 		/*
 		 * set up agf and agfl
 		 */
 		build_agf_agfl(mp, agno, &bno_btree_curs,
-				&bcnt_btree_curs, freeblks1, extra_blocks);
+				&bcnt_btree_curs, freeblks1, extra_blocks,
+				&rmap_btree_curs);
 		/*
 		 * build inode allocation tree.
 		 */
@@ -1758,6 +2135,8 @@ phase5_func(
 		 */
 		finish_cursor(&bno_btree_curs);
 		finish_cursor(&ino_btree_curs);
+		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+			finish_cursor(&rmap_btree_curs);
 		if (xfs_sb_version_hasfinobt(&mp->m_sb))
 			finish_cursor(&fino_btree_curs);
 		finish_cursor(&bcnt_btree_curs);

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux