[PATCH] xfs_repair: multithread phase 2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Dave Chinner <dchinner@xxxxxxxxxx>

Running some recent repair tests on broken filesystem meant running
phase 1 and 2 repeatedly to reproduce an issue at the start of phase
3. Phase 2 was taking approximately 10 minutes to run as it
processes each AG serially.

Phase 2 can be trivially parallelised - it is simply scanning the
per AG trees to calculate free block counts and free and used inodes
counts. This can be done safely in parallel by giving each AG it's
own structure to aggregate counts into, then once the AG scan is
complete adding them all together.

This patch uses 32-way threading which results in no noticable
slowdown on single SATA drives with NCQ, but results in ~10x
reduction in runtime on a 12 disk RAID-0 array.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 repair/phase2.c |   16 +---
 repair/scan.c   |  303 +++++++++++++++++++++++++++++++-----------------------
 repair/scan.h   |   37 -------
 3 files changed, 176 insertions(+), 180 deletions(-)

diff --git a/repair/phase2.c b/repair/phase2.c
index e81ebf0..2f22c51 100644
--- a/repair/phase2.c
+++ b/repair/phase2.c
@@ -26,8 +26,7 @@
 #include "progress.h"
 
 void	set_mp(xfs_mount_t *mpp);
-void	scan_ag(xfs_agnumber_t agno);
-void	validate_sb(struct xfs_sb *sb);
+void	scan_ags(struct xfs_mount *mp);
 
 /* workaround craziness in the xlog routines */
 int xlog_recover_do_trans(xlog_t *log, xlog_recover_t *t, int p) { return 0; }
@@ -109,7 +108,6 @@ zero_log(xfs_mount_t *mp)
 void
 phase2(xfs_mount_t *mp)
 {
-	xfs_agnumber_t		i;
 	int			j;
 	ino_tree_node_t		*ino_rec;
 
@@ -138,17 +136,7 @@ phase2(xfs_mount_t *mp)
 
 	set_progress_msg(PROG_FMT_SCAN_AG, (__uint64_t) glob_agcount);
 
-	for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
-		scan_ag(i);
-#ifdef XR_INODE_TRACE
-		print_inode_list(i);
-#endif
-	}
-
-	/*
-	 * Validate that our manual counts match the superblock.
-	 */
-	validate_sb(&mp->m_sb);
+	scan_ags(mp);
 
 	print_final_rpt();
 
diff --git a/repair/scan.c b/repair/scan.c
index 85017ff..dd62776 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -35,27 +35,32 @@ extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
 static xfs_mount_t	*mp = NULL;
 
 /*
- * Global variables to validate superblock values against the manual count
+ * Variables to validate AG header values against the manual count
  * from the btree traversal.
- *
- * No locking for now as phase2 is not threaded.
  */
-static __uint64_t	fdblocks;
-static __uint64_t	icount;
-static __uint64_t	ifreecount;
+struct aghdr_cnts {
+	xfs_agnumber_t	agno;
+	xfs_extlen_t	agffreeblks;
+	xfs_extlen_t	agflongest;
+	__uint64_t	agfbtreeblks;
+	__uint32_t	agicount;
+	__uint32_t	agifreecount;
+	__uint64_t	fdblocks;
+	__uint64_t	icount;
+	__uint64_t	ifreecount;
+};
+
+static void
+scanfunc_allocbt(
+	struct xfs_btree_block	*block,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot,
+	__uint32_t		magic,
+	struct aghdr_cnts	*agcnts);
 
-/*
- * Global variables to validate AG header values against the manual count
- * from the btree traversal.
- *
- * Note: these values must be reset when processing a new AG, and for now
- * forces the AG scanning in phase2 to not be threaded.
- */
-static xfs_extlen_t	agffreeblks;
-static xfs_extlen_t	agflongest;
-static __uint64_t	agfbtreeblks;
-static __uint32_t	agicount;
-static __uint32_t	agifreecount;
 
 void
 set_mp(xfs_mount_t *mpp)
@@ -75,8 +80,10 @@ scan_sbtree(
 				xfs_agblock_t		bno,
 				xfs_agnumber_t		agno,
 				int			suspect,
-				int			isroot),
-	int		isroot)
+				int			isroot,
+				struct aghdr_cnts	*agcnts),
+	int		isroot,
+	struct aghdr_cnts *agcnts)
 {
 	xfs_buf_t	*bp;
 
@@ -86,7 +93,8 @@ scan_sbtree(
 		do_error(_("can't read btree block %d/%d\n"), agno, root);
 		return;
 	}
-	(*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect, isroot);
+	(*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
+							isroot, agcnts);
 	libxfs_putbuf(bp);
 }
 
@@ -469,6 +477,34 @@ _("out-of-order bmap key (file offset) in inode %llu, %s fork, fsbno %llu\n"),
 }
 
 void
+scanfunc_bno(
+	struct xfs_btree_block	*block,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot,
+	struct aghdr_cnts	*agcnts)
+{
+	return scanfunc_allocbt(block, level, bno, agno,
+				suspect, isroot, XFS_ABTB_MAGIC, agcnts);
+}
+
+void
+scanfunc_cnt(
+	struct xfs_btree_block	*block,
+	int			level,
+	xfs_agblock_t		bno,
+	xfs_agnumber_t		agno,
+	int			suspect,
+	int			isroot,
+	struct aghdr_cnts	*agcnts)
+{
+	return scanfunc_allocbt(block, level, bno, agno,
+				suspect, isroot, XFS_ABTC_MAGIC, agcnts);
+}
+
+void
 scanfunc_allocbt(
 	struct xfs_btree_block	*block,
 	int			level,
@@ -476,7 +512,8 @@ scanfunc_allocbt(
 	xfs_agnumber_t		agno,
 	int			suspect,
 	int			isroot,
-	__uint32_t		magic)
+	__uint32_t		magic,
+	struct aghdr_cnts	*agcnts)
 {
 	const char 		*name;
 	int			i;
@@ -506,8 +543,8 @@ scanfunc_allocbt(
 	 * free data block counter.
 	 */
 	if (!isroot) {
-		agfbtreeblks++;
-		fdblocks++;
+		agcnts->agfbtreeblks++;
+		agcnts->fdblocks++;
 	}
 
 	if (be16_to_cpu(block->bb_level) != level) {
@@ -583,10 +620,10 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
 					lastblock = b;
 				}
 			} else {
-				fdblocks += len;
-				agffreeblks += len;
-				if (len > agflongest)
-					agflongest = len;
+				agcnts->fdblocks += len;
+				agcnts->agffreeblks += len;
+				if (len > agcnts->agflongest)
+					agcnts->agflongest = len;
 				if (len < lastcount) {
 					do_warn(_(
 	"out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
@@ -670,38 +707,12 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
 		if (bno != 0 && verify_agbno(mp, agno, bno)) {
 			scan_sbtree(bno, level, agno, suspect,
 				    (magic == XFS_ABTB_MAGIC) ?
-				     scanfunc_bno : scanfunc_cnt, 0);
+				     scanfunc_bno : scanfunc_cnt, 0,
+				     agcnts);
 		}
 	}
 }
 
-void
-scanfunc_bno(
-	struct xfs_btree_block	*block,
-	int			level,
-	xfs_agblock_t		bno,
-	xfs_agnumber_t		agno,
-	int			suspect,
-	int			isroot)
-{
-	return scanfunc_allocbt(block, level, bno, agno,
-				suspect, isroot, XFS_ABTB_MAGIC);
-}
-
-void
-scanfunc_cnt(
-	struct xfs_btree_block	*block,
-	int			level,
-	xfs_agblock_t		bno,
-	xfs_agnumber_t		agno,
-	int			suspect,
-	int			isroot
-	)
-{
-	return scanfunc_allocbt(block, level, bno, agno,
-				suspect, isroot, XFS_ABTC_MAGIC);
-}
-
 static int
 scan_single_ino_chunk(
 	xfs_agnumber_t		agno,
@@ -886,8 +897,8 @@ scanfunc_ino(
 	xfs_agblock_t		bno,
 	xfs_agnumber_t		agno,
 	int			suspect,
-	int			isroot
-	)
+	int			isroot,
+	struct aghdr_cnts	*agcnts)
 {
 	int			i;
 	int			numrecs;
@@ -968,10 +979,10 @@ _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
 		 * the block.  skip processing of bogus records.
 		 */
 		for (i = 0; i < numrecs; i++) {
-			agicount += XFS_INODES_PER_CHUNK;
-			icount += XFS_INODES_PER_CHUNK;
-			agifreecount += be32_to_cpu(rp[i].ir_freecount);
-			ifreecount += be32_to_cpu(rp[i].ir_freecount);
+			agcnts->agicount += XFS_INODES_PER_CHUNK;
+			agcnts->icount += XFS_INODES_PER_CHUNK;
+			agcnts->agifreecount += be32_to_cpu(rp[i].ir_freecount);
+			agcnts->ifreecount += be32_to_cpu(rp[i].ir_freecount);
 
 			suspect = scan_single_ino_chunk(agno, &rp[i], suspect);
 		}
@@ -1015,13 +1026,14 @@ _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
 		if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
 							be32_to_cpu(pp[i])))
 			scan_sbtree(be32_to_cpu(pp[i]), level, agno,
-					suspect, scanfunc_ino, 0);
+					suspect, scanfunc_ino, 0, agcnts);
 	}
 }
 
 void
 scan_freelist(
-	xfs_agf_t	*agf)
+	xfs_agf_t	*agf,
+	struct aghdr_cnts *agcnts)
 {
 	xfs_agfl_t	*agfl;
 	xfs_buf_t	*agflbuf;
@@ -1068,7 +1080,7 @@ scan_freelist(
 			be32_to_cpu(agf->agf_flcount), agno);
 	}
 
-	fdblocks += count;
+	agcnts->fdblocks += count;
 
 	libxfs_putbuf(agflbuf);
 }
@@ -1076,14 +1088,15 @@ scan_freelist(
 static void
 validate_agf(
 	struct xfs_agf		*agf,
-	xfs_agnumber_t		agno)
+	xfs_agnumber_t		agno,
+	struct aghdr_cnts	*agcnts)
 {
 	xfs_agblock_t		bno;
 
 	bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
 	if (bno != 0 && verify_agbno(mp, agno, bno)) {
 		scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-			    agno, 0, scanfunc_bno, 1);
+			    agno, 0, scanfunc_bno, 1, agcnts);
 	} else {
 		do_warn(_("bad agbno %u for btbno root, agno %d\n"),
 			bno, agno);
@@ -1092,33 +1105,34 @@ validate_agf(
 	bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
 	if (bno != 0 && verify_agbno(mp, agno, bno)) {
 		scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-			    agno, 0, scanfunc_cnt, 1);
+			    agno, 0, scanfunc_cnt, 1, agcnts);
 	} else  {
 		do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
 			bno, agno);
 	}
 
-	if (be32_to_cpu(agf->agf_freeblks) != agffreeblks) {
+	if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
 		do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
-			be32_to_cpu(agf->agf_freeblks), agffreeblks, agno);
+			be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
 	}
 
-	if (be32_to_cpu(agf->agf_longest) != agflongest) {
+	if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
 		do_warn(_("agf_longest %u, counted %u in ag %u\n"),
-			be32_to_cpu(agf->agf_longest), agflongest, agno);
+			be32_to_cpu(agf->agf_longest), agcnts->agflongest, agno);
 	}
 
 	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
-	    be32_to_cpu(agf->agf_btreeblks) != agfbtreeblks) {
+	    be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
 		do_warn(_("agf_btreeblks %u, counted %u in ag %u\n"),
-			be32_to_cpu(agf->agf_btreeblks), agfbtreeblks, agno);
+			be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
 	}
 }
 
 static void
 validate_agi(
 	struct xfs_agi		*agi,
-	xfs_agnumber_t		agno)
+	xfs_agnumber_t		agno,
+	struct aghdr_cnts	*agcnts)
 {
 	xfs_agblock_t		bno;
 	int			i;
@@ -1126,20 +1140,20 @@ validate_agi(
 	bno = be32_to_cpu(agi->agi_root);
 	if (bno != 0 && verify_agbno(mp, agno, bno)) {
 		scan_sbtree(bno, be32_to_cpu(agi->agi_level),
-			    agno, 0, scanfunc_ino, 1);
+			    agno, 0, scanfunc_ino, 1, agcnts);
 	} else {
 		do_warn(_("bad agbno %u for inobt root, agno %d\n"),
 			be32_to_cpu(agi->agi_root), agno);
 	}
 
-	if (be32_to_cpu(agi->agi_count) != agicount) {
+	if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
 		do_warn(_("agi_count %u, counted %u in ag %u\n"),
-			 be32_to_cpu(agi->agi_count), agicount, agno);
+			 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
 	}
 
-	if (be32_to_cpu(agi->agi_freecount) != agifreecount) {
+	if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
 		do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
-			be32_to_cpu(agi->agi_freecount), agifreecount, agno);
+			be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, agno);
 	}
 
 	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
@@ -1155,42 +1169,15 @@ validate_agi(
 }
 
 /*
- * Validate block/inode counts in the superblock.
- *
- * Note: needs to be called after scan_ag() has been called for all
- * allocation groups.
- */
-void
-validate_sb(
-	struct xfs_sb		*sb)
-{
-	if (sb->sb_icount != icount) {
-		do_warn(_("sb_icount %lld, counted %lld\n"),
-			sb->sb_icount, icount);
-	}
-
-	if (sb->sb_ifree != ifreecount) {
-		do_warn(_("sb_ifree %lld, counted %lld\n"),
-			sb->sb_ifree, ifreecount);
-	}
-
-	if (sb->sb_fdblocks != fdblocks) {
-		do_warn(_("sb_fdblocks %lld, counted %lld\n"),
-			sb->sb_fdblocks, fdblocks);
-	}
-
-	/* XXX: check sb_frextents */
-}
-
-/*
  * Scan an AG for obvious corruption.
  *
  * Note: This code is not reentrant due to the use of global variables.
  */
-void
-scan_ag(
-	xfs_agnumber_t	agno)
+void *
+scan_ag(void *args)
 {
+	struct aghdr_cnts *agcnts = args;
+	xfs_agnumber_t	agno = agcnts->agno;
 	xfs_agf_t	*agf;
 	xfs_buf_t	*agfbuf;
 	int		agf_dirty = 0;
@@ -1202,28 +1189,18 @@ scan_ag(
 	int		sb_dirty = 0;
 	int		status;
 
-	/*
-	 * Reset the global variables to track the AG header validity.
-	 *
-	 * Because we use global variable but can get called multiple times
-	 * we have to make sure to always reset these variables.
-	 */
-	agicount = agifreecount = 0;
-	agffreeblks = agfbtreeblks = 0;
-	agflongest = 0;
-
 	sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
 				XFS_FSS_TO_BB(mp, 1), 0);
 	if (!sbbuf)  {
 		do_error(_("can't get root superblock for ag %d\n"), agno);
-		return;
+		return NULL;
 	}
 
 	sb = (xfs_sb_t *)calloc(BBSIZE, 1);
 	if (!sb) {
 		do_error(_("can't allocate memory for superblock\n"));
 		libxfs_putbuf(sbbuf);
-		return;
+		return NULL;
 	}
 	libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
 
@@ -1234,7 +1211,7 @@ scan_ag(
 		do_error(_("can't read agf block for ag %d\n"), agno);
 		libxfs_putbuf(sbbuf);
 		free(sb);
-		return;
+		return NULL;
 	}
 	agf = XFS_BUF_TO_AGF(agfbuf);
 
@@ -1246,7 +1223,7 @@ scan_ag(
 		libxfs_putbuf(agfbuf);
 		libxfs_putbuf(sbbuf);
 		free(sb);
-		return;
+		return NULL;
 	}
 	agi = XFS_BUF_TO_AGI(agibuf);
 
@@ -1298,13 +1275,13 @@ scan_ag(
 		do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
 			agno);
 
-		return;
+		return NULL;
 	}
 
-	scan_freelist(agf);
+	scan_freelist(agf, agcnts);
 
-	validate_agf(agf, agno);
-	validate_agi(agi, agno);
+	validate_agf(agf, agno, agcnts);
+	validate_agi(agi, agno, agcnts);
 
 	ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
 
@@ -1331,4 +1308,72 @@ scan_ag(
 		libxfs_putbuf(sbbuf);
 	free(sb);
 	PROG_RPT_INC(prog_rpt_done[agno], 1);
+
+#ifdef XR_INODE_TRACE
+	print_inode_list(i);
+#endif
+	return NULL;
+}
+
+#define SCAN_THREADS 32
+
+void
+scan_ags(
+	struct xfs_mount	*mp)
+{
+	struct aghdr_cnts agcnts[mp->m_sb.sb_agcount];
+	pthread_t	thr[SCAN_THREADS];
+	__uint64_t	fdblocks = 0;
+	__uint64_t	icount = 0;
+	__uint64_t	ifreecount = 0;
+	int		i, j, err;
+
+	/*
+	 * scan a few AGs in parallel. The scan is IO latency bound,
+	 * so running a few at a time will speed it up significantly.
+	 */
+	for (i = 0; i < mp->m_sb.sb_agcount; i += SCAN_THREADS) {
+		for (j = 0; j < SCAN_THREADS; j++) {
+			if (i + j >= mp->m_sb.sb_agcount)
+				break;
+			memset(&agcnts[i + j], 0, sizeof(agcnts[i]));
+			agcnts[i + j].agno = i + j;
+			err = pthread_create(&thr[j], NULL, scan_ag,
+							&agcnts[i + j]);
+			if (err)
+				do_abort(_("pthread_create failed in scan_ags\n"));
+		}
+		for (j = 0; j < SCAN_THREADS; j++) {
+			if (i + j >= mp->m_sb.sb_agcount)
+				break;
+			pthread_join(thr[j], NULL);
+		}
+	}
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		fdblocks += agcnts[i].fdblocks;
+		icount += agcnts[i].icount;
+		ifreecount += agcnts[i].ifreecount;
+	}
+
+	/*
+	 * Validate that our manual counts match the superblock.
+	 */
+	if (mp->m_sb.sb_icount != icount) {
+		do_warn(_("sb_icount %lld, counted %lld\n"),
+			mp->m_sb.sb_icount, icount);
+	}
+
+	if (mp->m_sb.sb_ifree != ifreecount) {
+		do_warn(_("sb_ifree %lld, counted %lld\n"),
+			mp->m_sb.sb_ifree, ifreecount);
+	}
+
+	if (mp->m_sb.sb_fdblocks != fdblocks) {
+		do_warn(_("sb_fdblocks %lld, counted %lld\n"),
+			mp->m_sb.sb_fdblocks, fdblocks);
+	}
+
+	/* XXX: check sb_frextents */
 }
+
diff --git a/repair/scan.h b/repair/scan.h
index 20567fb..4de1cef 100644
--- a/repair/scan.h
+++ b/repair/scan.h
@@ -20,19 +20,6 @@
 
 struct blkmap;
 
-void scan_sbtree(
-	xfs_agblock_t	root,
-	int		nlevels,
-	xfs_agnumber_t	agno,
-	int		suspect,
-	void		(*func)(struct xfs_btree_block	*block,
-				int			level,
-				xfs_agblock_t		bno,
-				xfs_agnumber_t		agno,
-				int			suspect,
-				int			isroot),
-	int		isroot);
-
 int scan_lbtree(
 	xfs_dfsbno_t	root,
 	int		nlevels,
@@ -74,29 +61,5 @@ int scanfunc_bmap(
 	int			check_dups,
 	int			*dirty);
 
-void scanfunc_bno(
-	struct xfs_btree_block	*block,
-	int			level,
-	xfs_agblock_t		bno,
-	xfs_agnumber_t		agno,
-	int			suspect,
-	int			isroot);
-
-void scanfunc_cnt(
-	struct xfs_btree_block	*block,
-	int			level,
-	xfs_agblock_t		bno,
-	xfs_agnumber_t		agno,
-	int			suspect,
-	int			isroot);
-
-void
-scanfunc_ino(
-	struct xfs_btree_block	*block,
-	int			level,
-	xfs_agblock_t		bno,
-	xfs_agnumber_t		agno,
-	int			suspect,
-	int			isroot);
 
 #endif /* _XR_SCAN_H */
-- 
1.7.2.3

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs


[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux