[PATCH 06/10] xfs_repair: mark unreachable prefetched metadata blocks stale

"Darrick J. Wong" <darrick.wong@xxxxxxxxxx> · Fri, 14 Aug 2015 18:44:17 -0700

When we're running xfs_repair with prefetch enabled, it's possible
that repair will decide to clear an inode without examining all
metadata blocks owned by that inode.  This leaves the unreferenced
prefetched buffers marked UNCHECKED, which will cause a subsequent CRC
error if the block is reallocated to a different structure and read
more than once.  Typically this happens when a large directory is
corrupted and lost+found has to grow to accomodate all the
disconnected inodes.

Therefore, clear the UNCHECKED flag and set the STALE flag to get rid
of the CRC errors and ensure that the blocks aren't written back out
to disk without first being marked dirty.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 repair/phase4.c |    5 +++++
 repair/phase6.c |    4 ++++
 repair/scan.c   |   36 ++++++++++++++++++++++++++++++++++++
 repair/scan.h   |    2 ++
 4 files changed, 47 insertions(+)

diff --git a/repair/phase4.c b/repair/phase4.c
index e0571e8..13b2946 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -30,6 +30,7 @@
 #include "versions.h"
 #include "dir2.h"
 #include "progress.h"
+#include "scan.h"
 
 
 /*
@@ -300,6 +301,10 @@ phase4(xfs_mount_t *mp)
 	 * already in phase 3.
 	 */
 	process_ags(mp);
+
+	/* Mark stale anything we didn't get to. */
+	mark_unchecked_buffers_stale();
+
 	print_final_rpt();
 
 	/*
diff --git a/repair/phase6.c b/repair/phase6.c
index 467f119..5edaa30 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -29,6 +29,7 @@
 #include "dinode.h"
 #include "progress.h"
 #include "versions.h"
+#include "scan.h"
 
 static struct cred		zerocr;
 static struct fsxattr 		zerofsx;
@@ -3312,6 +3313,9 @@ _("        - resetting contents of realtime bitmap and summary inodes\n"));
 	 */
 	traverse_ags(mp);
 
+	/* Mark stale anything we didn't get to. */
+	mark_unchecked_buffers_stale();
+
 	/*
 	 * any directories that had updated ".." entries, rebuild them now
 	 */
diff --git a/repair/scan.c b/repair/scan.c
index 1e7a4da..431fd24 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -29,6 +29,7 @@
 #include "bmap.h"
 #include "progress.h"
 #include "threads.h"
+#include "cache.h"
 
 static xfs_mount_t	*mp = NULL;
 
@@ -1804,3 +1805,38 @@ scan_ags(
 	}
 }
 
+static void
+mark_buf_stale(
+	struct cache_node	*cn)
+{
+	struct xfs_buf		*bp = (struct xfs_buf *)cn;
+
+	if (bp->b_flags & LIBXFS_B_UNCHECKED) {
+		bp->b_flags &= ~LIBXFS_B_UNCHECKED;
+		bp->b_flags |= LIBXFS_B_STALE;
+	}
+}
+
+/*
+ * Find unchecked buffers and mark them checked and stale.
+ *
+ * When prefetch is enabled, buffers will be marked unchecked if they fail
+ * verification.  Actually examining the block clears the unchecked flag, so
+ * any buffer still unchecked at the end of the examination represents an
+ * unreachable block.  A block that was reachable during prefetch but isn't
+ * by the end of the examination was owned by something that was freed as
+ * part of the exam.  Therefore, the buffer can be considered free.  Therefore,
+ * set the stale flag so that getbuf and readbuf know to zero the buffer
+ * contents the next time the buffer is accessed.
+ *
+ * This also fixes the problem that repair reports CRC errors if the block is
+ * subsequently allocated to something else, reinitialized, and re-read.  This
+ * can happen if a directory with a corrupt dir3 leaf block is erased and the
+ * leaf block gets reused to grow lost+found during phase 7.
+ */
+void
+mark_unchecked_buffers_stale(void)
+{
+	cache_walk(libxfs_bcache, mark_buf_stale);
+}
+
diff --git a/repair/scan.h b/repair/scan.h
index ea8c0bf..d232a54 100644
--- a/repair/scan.h
+++ b/repair/scan.h
@@ -70,4 +70,6 @@ scan_ags(
 	struct xfs_mount	*mp,
 	int			scan_threads);
 
+void mark_unchecked_buffers_stale(void);
+
 #endif /* _XR_SCAN_H */

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs