[PATCH 056/145] xfs_repair: use rmap btree data to check block types

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Dave Chinner <dchinner@xxxxxxxxxx>

Use the rmap btree to pre-populate the block type information so that
when repair iterates the primary metadata, we can confirm the block
type.

Ensure that we remove the flag bits from blockcount before using the
length field.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
[darrick.wong@xxxxxxxxxx: split patch, strip flag bits from blockcount]
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 repair/dinode.c     |    6 +
 repair/incore.h     |   16 +-
 repair/scan.c       |  356 ++++++++++++++++++++++++++++++++++++++++++++++++---
 repair/xfs_repair.c |    2 
 4 files changed, 351 insertions(+), 29 deletions(-)


diff --git a/repair/dinode.c b/repair/dinode.c
index cbd4305..c1e60ff 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -744,6 +744,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
 _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
 					forkname, ino, (__uint64_t) b);
 				/* fall through ... */
+			case XR_E_INUSE1:	/* seen by rmap */
 			case XR_E_UNKNOWN:
 				set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
 				break;
@@ -751,6 +752,11 @@ _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
 			case XR_E_BAD_STATE:
 				do_error(_("bad state in block map %" PRIu64 "\n"), b);
 
+			case XR_E_FS_MAP1:
+			case XR_E_INO1:
+			case XR_E_INUSE_FS1:
+				do_warn(_("rmap claims metadata use!\n"));
+				/* fall through */
 			case XR_E_FS_MAP:
 			case XR_E_INO:
 			case XR_E_INUSE_FS:
diff --git a/repair/incore.h b/repair/incore.h
index c92475e..bc0810b 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -102,17 +102,11 @@ typedef struct rt_extent_tree_node  {
 #define XR_E_MULT	5	/* extent is multiply referenced */
 #define XR_E_INO	6	/* extent used by inodes (inode blocks) */
 #define XR_E_FS_MAP	7	/* extent used by fs space/inode maps */
-#define XR_E_BAD_STATE	8
-
-/* extent states, in 64 bit word chunks */
-#define	XR_E_UNKNOWN_LL		0x0000000000000000LL
-#define	XR_E_FREE1_LL		0x1111111111111111LL
-#define	XR_E_FREE_LL		0x2222222222222222LL
-#define	XR_E_INUSE_LL		0x3333333333333333LL
-#define	XR_E_INUSE_FS_LL	0x4444444444444444LL
-#define	XR_E_MULT_LL		0x5555555555555555LL
-#define	XR_E_INO_LL		0x6666666666666666LL
-#define	XR_E_FS_MAP_LL		0x7777777777777777LL
+#define XR_E_INUSE1	8	/* used block (marked by rmap btree) */
+#define XR_E_INUSE_FS1	9	/* used by fs ag header or log (rmap btree) */
+#define XR_E_INO1	10	/* used by inodes (marked by rmap btree) */
+#define XR_E_FS_MAP1	11	/* used by fs space/inode maps (rmap btree) */
+#define XR_E_BAD_STATE	12
 
 /* separate state bit, OR'ed into high (4th) bit of ex_state field */
 
diff --git a/repair/scan.c b/repair/scan.c
index 964ff06..eb23685 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -44,6 +44,7 @@ struct aghdr_cnts {
 	__uint32_t	agicount;
 	__uint32_t	agifreecount;
 	__uint64_t	fdblocks;
+	__uint64_t	usedblocks;
 	__uint64_t	ifreecount;
 	__uint32_t	fibtfreecount;
 };
@@ -308,6 +309,13 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
 		pthread_mutex_lock(&ag_locks[agno].lock);
 		state = get_bmap(agno, agbno);
 		switch (state) {
+		case XR_E_INUSE1:
+			/*
+			 * block was claimed as in use data by the rmap
+			 * btree, but has not been found in the data extent
+			 * map for the inode. That means this bmbt block hasn't
+			 * yet been claimed as in use, which means -it's ours-
+			 */
 		case XR_E_UNKNOWN:
 		case XR_E_FREE1:
 		case XR_E_FREE:
@@ -764,6 +772,272 @@ ino_issparse(
 	return xfs_inobt_is_sparse_disk(rp, offset);
 }
 
+static void
+scan_rmapbt(
+	struct xfs_btree_block	*block,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot,
+	__uint32_t		magic,
+	void			*priv)
+{
+	struct aghdr_cnts	*agcnts = priv;
+	const char		*name = "rmap";
+	int			i;
+	xfs_rmap_ptr_t		*pp;
+	struct xfs_rmap_rec	*rp;
+	int			hdr_errors = 0;
+	int			numrecs;
+	int			state;
+	xfs_agblock_t		lastblock = 0;
+	int64_t			lastowner = 0;
+	int64_t			lastoffset = 0;
+
+	if (magic != XFS_RMAP_CRC_MAGIC) {
+		name = "(unknown)";
+		assert(0);
+	}
+
+	if (be32_to_cpu(block->bb_magic) != magic) {
+		do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
+			be32_to_cpu(block->bb_magic), name, agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+
+	/*
+	 * All RMAP btree blocks except the roots are freed for a
+	 * fully empty filesystem, thus they are counted towards the
+	 * free data block counter.
+	 */
+	if (!isroot) {
+		agcnts->agfbtreeblks++;
+		agcnts->fdblocks++;
+	}
+
+	if (be16_to_cpu(block->bb_level) != level) {
+		do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
+			level, be16_to_cpu(block->bb_level), name, agno, bno);
+		hdr_errors++;
+		if (suspect)
+			return;
+	}
+
+	/* check for btree blocks multiply claimed */
+	state = get_bmap(agno, bno);
+	if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1))  {
+		set_bmap(agno, bno, XR_E_MULT);
+		do_warn(
+_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
+				name, state, agno, bno, suspect);
+		return;
+	}
+	set_bmap(agno, bno, XR_E_FS_MAP);
+
+	numrecs = be16_to_cpu(block->bb_numrecs);
+	if (level == 0) {
+		if (numrecs > mp->m_rmap_mxr[0])  {
+			numrecs = mp->m_rmap_mxr[0];
+			hdr_errors++;
+		}
+		if (isroot == 0 && numrecs < mp->m_rmap_mnr[0])  {
+			numrecs = mp->m_rmap_mnr[0];
+			hdr_errors++;
+		}
+
+		if (hdr_errors) {
+			do_warn(
+	_("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+				be16_to_cpu(block->bb_numrecs),
+				mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
+				name, agno, bno);
+			suspect++;
+		}
+
+		rp = XFS_RMAP_REC_ADDR(block, 1);
+		for (i = 0; i < numrecs; i++) {
+			xfs_agblock_t		b, end;
+			xfs_extlen_t		len, blen;
+			int64_t			owner, offset;
+
+			b = be32_to_cpu(rp[i].rm_startblock);
+			len = be32_to_cpu(rp[i].rm_blockcount);
+			owner = be64_to_cpu(rp[i].rm_owner);
+			offset = be64_to_cpu(rp[i].rm_offset);
+			end = b + len;
+
+			/* Make sure agbno & len make sense. */
+			if (!verify_agbno(mp, agno, b)) {
+				do_warn(
+	_("invalid start block %u in record %u of %s btree block %u/%u\n"),
+					b, i, name, agno, bno);
+				continue;
+			}
+			if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
+				do_warn(
+	_("invalid length %u in record %u of %s btree block %u/%u\n"),
+					len, i, name, agno, bno);
+				continue;
+			}
+
+			/* Look for impossible owners. */
+			if (!(owner > 0 || (owner > XFS_RMAP_OWN_MIN &&
+					    owner <= XFS_RMAP_OWN_FS)))
+				do_warn(
+	_("invalid owner in rmap btree record %d (%"PRId64" %u) block %u/%u\n"),
+						i, owner, len, agno, bno);
+
+			/* Check for out of order records. */
+			if (i == 0) {
+advance:
+				lastblock = b;
+				lastowner = owner;
+				lastoffset = offset;
+			} else {
+				bool bad;
+
+				bad = b <= lastblock;
+				if (bad)
+					do_warn(
+	_("out-of-order rmap btree record %d (%u %"PRId64" %"PRIx64" %u) block %u/%u\n"),
+					i, b, owner, offset, len, agno, bno);
+				else
+					goto advance;
+			}
+
+			/* Check for block owner collisions. */
+			for ( ; b < end; b += blen)  {
+				state = get_bmap_ext(agno, b, end, &blen);
+				switch (state) {
+				case XR_E_UNKNOWN:
+					switch (owner) {
+					case XFS_RMAP_OWN_FS:
+					case XFS_RMAP_OWN_LOG:
+						set_bmap(agno, b, XR_E_INUSE_FS1);
+						break;
+					case XFS_RMAP_OWN_AG:
+					case XFS_RMAP_OWN_INOBT:
+						set_bmap(agno, b, XR_E_FS_MAP1);
+						break;
+					case XFS_RMAP_OWN_INODES:
+						set_bmap(agno, b, XR_E_INO1);
+						break;
+					case XFS_RMAP_OWN_NULL:
+						/* still unknown */
+						break;
+					default:
+						/* file data */
+						set_bmap(agno, b, XR_E_INUSE1);
+						break;
+					}
+					break;
+				case XR_E_INUSE_FS:
+					if (owner == XFS_RMAP_OWN_FS ||
+					    owner == XFS_RMAP_OWN_LOG)
+						break;
+					do_warn(
+_("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+						agno, b, b + blen - 1,
+						name, state, owner);
+					break;
+				case XR_E_FS_MAP:
+					if (owner == XFS_RMAP_OWN_AG ||
+					    owner == XFS_RMAP_OWN_INOBT)
+						break;
+					do_warn(
+_("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+						agno, b, b + blen - 1,
+						name, state, owner);
+					break;
+				case XR_E_INO:
+					if (owner == XFS_RMAP_OWN_INODES)
+						break;
+					do_warn(
+_("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+						agno, b, b + blen - 1,
+						name, state, owner);
+					break;
+				case XR_E_INUSE:
+					if (owner >= 0 &&
+					    owner < mp->m_sb.sb_dblocks)
+						break;
+					do_warn(
+_("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+						agno, b, b + blen - 1,
+						name, state, owner);
+					break;
+				case XR_E_FREE1:
+				case XR_E_FREE:
+					/*
+					 * May be on the AGFL. If not, they'll
+					 * be caught later.
+					 */
+					break;
+				default:
+					do_warn(
+_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
+						agno, b, b + blen - 1,
+						name, state, owner);
+					break;
+				}
+			}
+		}
+		return;
+	}
+
+	/*
+	 * interior record
+	 */
+	pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+
+	if (numrecs > mp->m_rmap_mxr[1])  {
+		numrecs = mp->m_rmap_mxr[1];
+		hdr_errors++;
+	}
+	if (isroot == 0 && numrecs < mp->m_rmap_mnr[1])  {
+		numrecs = mp->m_rmap_mnr[1];
+		hdr_errors++;
+	}
+
+	/*
+	 * don't pass bogus tree flag down further if this block
+	 * looked ok.  bail out if two levels in a row look bad.
+	 */
+	if (hdr_errors)  {
+		do_warn(
+	_("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+			be16_to_cpu(block->bb_numrecs),
+			mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
+			name, agno, bno);
+		if (suspect)
+			return;
+		suspect++;
+	} else if (suspect) {
+		suspect = 0;
+	}
+
+	for (i = 0; i < numrecs; i++)  {
+		xfs_agblock_t		bno = be32_to_cpu(pp[i]);
+
+		/*
+		 * XXX - put sibling detection right here.
+		 * we know our sibling chain is good.  So as we go,
+		 * we check the entry before and after each entry.
+		 * If either of the entries references a different block,
+		 * check the sibling pointer.  If there's a sibling
+		 * pointer mismatch, try and extract as much data
+		 * as possible.
+		 */
+		if (bno != 0 && verify_agbno(mp, agno, bno)) {
+			scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
+				    magic, priv, &xfs_rmapbt_buf_ops);
+		}
+	}
+}
+
 /*
  * The following helpers are to help process and validate individual on-disk
  * inode btree records. We have two possible inode btrees with slightly
@@ -976,20 +1250,27 @@ scan_single_ino_chunk(
 
 			agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
 			state = get_bmap(agno, agbno);
-			if (state == XR_E_UNKNOWN)  {
-				set_bmap(agno, agbno, XR_E_INO);
-			} else if (state == XR_E_INUSE_FS && agno == 0 &&
-				   ino + j >= first_prealloc_ino &&
-				   ino + j < last_prealloc_ino)  {
+			switch (state) {
+			case XR_E_INO:
+				break;
+			case XR_E_UNKNOWN:
+			case XR_E_INO1:	/* seen by rmap */
 				set_bmap(agno, agbno, XR_E_INO);
-			} else  {
+				break;
+			case XR_E_INUSE_FS:
+			case XR_E_INUSE_FS1:
+				if (agno == 0 &&
+				    ino + j >= first_prealloc_ino &&
+				    ino + j < last_prealloc_ino) {
+					set_bmap(agno, agbno, XR_E_INO);
+					break;
+				}
+				/* fall through */
+			default:
+				/* XXX - maybe should mark block a duplicate */
 				do_warn(
 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
 					agno, agbno, mp->m_sb.sb_inopblock);
-				/*
-				 * XXX - maybe should mark
-				 * block a duplicate
-				 */
 				return ++suspect;
 			}
 		}
@@ -1099,19 +1380,35 @@ _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb
 				continue;
 			}
 
-			if (state == XR_E_INO) {
-				continue;
-			} else if ((state == XR_E_UNKNOWN) ||
-				   (state == XR_E_INUSE_FS && agno == 0 &&
-				    ino + j >= first_prealloc_ino &&
-				    ino + j < last_prealloc_ino)) {
+			switch (state) {
+			case XR_E_INO:
+				break;
+			case XR_E_INO1:	/* seen by rmap */
+				set_bmap(agno, agbno, XR_E_INO);
+				break;
+			case XR_E_UNKNOWN:
 				do_warn(
 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
 					agno, agbno, mp->m_sb.sb_inopblock);
 
 				set_bmap(agno, agbno, XR_E_INO);
 				suspect++;
-			} else {
+				break;
+			case XR_E_INUSE_FS:
+			case XR_E_INUSE_FS1:
+				if (agno == 0 &&
+				    ino + j >= first_prealloc_ino &&
+				    ino + j < last_prealloc_ino) {
+					do_warn(
+_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
+						agno, agbno, mp->m_sb.sb_inopblock);
+
+					set_bmap(agno, agbno, XR_E_INO);
+					suspect++;
+					break;
+				}
+				/* fall through */
+			default:
 				do_warn(
 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
 					agno, agbno, mp->m_sb.sb_inopblock);
@@ -1280,6 +1577,7 @@ scan_inobt(
 	 */
 	state = get_bmap(agno, bno);
 	switch (state)  {
+	case XR_E_FS_MAP1: /* already been seen by an rmap scan */
 	case XR_E_UNKNOWN:
 	case XR_E_FREE1:
 	case XR_E_FREE:
@@ -1420,7 +1718,7 @@ scan_freelist(
 	if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
 	    XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
 	    XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
-		set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+		set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
 
 	if (be32_to_cpu(agf->agf_flcount) == 0)
 		return;
@@ -1505,6 +1803,19 @@ validate_agf(
 			bno, agno);
 	}
 
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+		if (bno != 0 && verify_agbno(mp, agno, bno)) {
+			scan_sbtree(bno,
+				    be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
+				    agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
+				    agcnts, &xfs_rmapbt_buf_ops);
+		} else  {
+			do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
+				bno, agno);
+		}
+	}
+
 	if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
 		do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
 			be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
@@ -1520,6 +1831,7 @@ validate_agf(
 		do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
 			be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
 	}
+
 }
 
 static void
@@ -1759,6 +2071,7 @@ scan_ags(
 	__uint64_t	fdblocks = 0;
 	__uint64_t	icount = 0;
 	__uint64_t	ifreecount = 0;
+	__uint64_t	usedblocks = 0;
 	xfs_agnumber_t	i;
 	work_queue_t	wq;
 
@@ -1781,6 +2094,7 @@ scan_ags(
 		fdblocks += agcnts[i].fdblocks;
 		icount += agcnts[i].agicount;
 		ifreecount += agcnts[i].ifreecount;
+		usedblocks += agcnts[i].usedblocks;
 	}
 
 	free(agcnts);
@@ -1802,4 +2116,10 @@ scan_ags(
 		do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
 			mp->m_sb.sb_fdblocks, fdblocks);
 	}
+
+	if (usedblocks &&
+	    usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
+		do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
+			mp->m_sb.sb_dblocks - fdblocks, usedblocks);
+	}
 }
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 9d91f2d..709c0c3 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -417,6 +417,8 @@ calc_mkfs(xfs_mount_t *mp)
 	fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 		fino_bno++;
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		fino_bno++;
 
 	/*
 	 * If the log is allocated in the first allocation group we need to

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux