[PATCH 09/23] xfs_scrub: one read/verify pool per disk

"Darrick J. Wong" <darrick.wong@xxxxxxxxxx> · Fri, 01 Mar 2019 15:27:50 -0800

From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

Simplify the read/verify pool code further by creating one pool per
disk.  This enables us to tailor the concurrency levels of each disk to
that specific disk so that if we have a mixed hdd/ssd environment we
don't flood the hdd with a lot of requests.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 scrub/phase6.c      |  110 ++++++++++++++++++++++++++++++++++++---------------
 scrub/read_verify.c |   29 ++++++-------
 scrub/read_verify.h |   10 +++--
 3 files changed, 98 insertions(+), 51 deletions(-)

diff --git a/scrub/phase6.c b/scrub/phase6.c
index fe121769..ccb795ab 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -33,18 +33,29 @@
  * and report the paths of the now corrupt files.
  */
 
+/* Verify disk blocks with GETFSMAP */
+
+struct xfs_verify_extent {
+	struct read_verify_pool	*rvp_data;
+	struct read_verify_pool	*rvp_log;
+	struct read_verify_pool	*rvp_realtime;
+	struct bitmap		*d_bad;		/* bytes */
+	struct bitmap		*r_bad;		/* bytes */
+};
+
 /* Find the fd for a given device identifier. */
-static struct disk *
-xfs_dev_to_disk(
-	struct scrub_ctx	*ctx,
-	dev_t			dev)
+static struct read_verify_pool *
+xfs_dev_to_pool(
+	struct scrub_ctx		*ctx,
+	struct xfs_verify_extent	*ve,
+	dev_t				dev)
 {
 	if (dev == ctx->fsinfo.fs_datadev)
-		return ctx->datadev;
+		return ve->rvp_data;
 	else if (dev == ctx->fsinfo.fs_logdev)
-		return ctx->logdev;
+		return ve->rvp_log;
 	else if (dev == ctx->fsinfo.fs_rtdev)
-		return ctx->rtdev;
+		return ve->rvp_realtime;
 	abort();
 }
 
@@ -285,14 +296,6 @@ xfs_report_verify_errors(
 	return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
 }
 
-/* Verify disk blocks with GETFSMAP */
-
-struct xfs_verify_extent {
-	struct read_verify_pool	*readverify;
-	struct bitmap		*d_bad;		/* bytes */
-	struct bitmap		*r_bad;		/* bytes */
-};
-
 /* Report an IO error resulting from read-verify based off getfsmap. */
 static bool
 xfs_check_rmap_error_report(
@@ -393,7 +396,9 @@ xfs_check_rmap(
 	void				*arg)
 {
 	struct xfs_verify_extent	*ve = arg;
-	struct disk			*disk;
+	struct read_verify_pool		*rvp;
+
+	rvp = xfs_dev_to_pool(ctx, ve, map->fmr_device);
 
 	dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
 			" offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
@@ -420,19 +425,32 @@ xfs_check_rmap(
 	/* XXX: Filter out directory data blocks. */
 
 	/* Schedule the read verify command for (eventual) running. */
-	disk = xfs_dev_to_disk(ctx, map->fmr_device);
-
-	read_verify_schedule_io(ve->readverify, disk, map->fmr_physical,
-			map->fmr_length, ve);
+	read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, ve);
 
 out:
 	/* Is this the last extent?  Fire off the read. */
 	if (map->fmr_flags & FMR_OF_LAST)
-		read_verify_force_io(ve->readverify);
+		read_verify_force_io(rvp);
 
 	return true;
 }
 
+/* Wait for read/verify actions to finish, then return # bytes checked. */
+static uint64_t
+clean_pool(
+	struct read_verify_pool	*rvp)
+{
+	uint64_t		ret;
+
+	if (!rvp)
+		return 0;
+
+	read_verify_pool_flush(rvp);
+	ret += read_verify_bytes(rvp);
+	read_verify_pool_destroy(rvp);
+	return ret;
+}
+
 /*
  * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
  * do data checksums, we trust that the underlying storage will pass back
@@ -445,7 +463,7 @@ bool
 xfs_scan_blocks(
 	struct scrub_ctx		*ctx)
 {
-	struct xfs_verify_extent	ve;
+	struct xfs_verify_extent	ve = { NULL };
 	bool				moveon;
 
 	moveon = bitmap_init(&ve.d_bad);
@@ -460,21 +478,43 @@ xfs_scan_blocks(
 		goto out_dbad;
 	}
 
-	ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
-			xfs_check_rmap_ioerr, disk_heads(ctx->datadev),
+	ve.rvp_data = read_verify_pool_init(ctx, ctx->datadev,
+			ctx->geo.blocksize, xfs_check_rmap_ioerr,
 			scrub_nproc(ctx));
-	if (!ve.readverify) {
+	if (!ve.rvp_data) {
 		moveon = false;
 		str_info(ctx, ctx->mntpoint,
-_("Could not create media verifier."));
+_("Could not create data device media verifier."));
 		goto out_rbad;
 	}
+	if (ctx->logdev) {
+		ve.rvp_log = read_verify_pool_init(ctx, ctx->logdev,
+				ctx->geo.blocksize, xfs_check_rmap_ioerr,
+				scrub_nproc(ctx));
+		if (!ve.rvp_log) {
+			moveon = false;
+			str_info(ctx, ctx->mntpoint,
+	_("Could not create log device media verifier."));
+			goto out_datapool;
+		}
+	}
+	if (ctx->rtdev) {
+		ve.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev,
+				ctx->geo.blocksize, xfs_check_rmap_ioerr,
+				scrub_nproc(ctx));
+		if (!ve.rvp_realtime) {
+			moveon = false;
+			str_info(ctx, ctx->mntpoint,
+	_("Could not create realtime device media verifier."));
+			goto out_logpool;
+		}
+	}
 	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
 	if (!moveon)
-		goto out_pool;
-	read_verify_pool_flush(ve.readverify);
-	ctx->bytes_checked += read_verify_bytes(ve.readverify);
-	read_verify_pool_destroy(ve.readverify);
+		goto out_rtpool;
+	ctx->bytes_checked += clean_pool(ve.rvp_data);
+	ctx->bytes_checked += clean_pool(ve.rvp_log);
+	ctx->bytes_checked += clean_pool(ve.rvp_realtime);
 
 	/* Scan the whole dir tree to see what matches the bad extents. */
 	if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
@@ -484,8 +524,14 @@ _("Could not create media verifier."));
 	bitmap_free(&ve.d_bad);
 	return moveon;
 
-out_pool:
-	read_verify_pool_destroy(ve.readverify);
+out_rtpool:
+	if (ve.rvp_realtime)
+		read_verify_pool_destroy(ve.rvp_realtime);
+out_logpool:
+	if (ve.rvp_log)
+		read_verify_pool_destroy(ve.rvp_log);
+out_datapool:
+	read_verify_pool_destroy(ve.rvp_data);
 out_rbad:
 	bitmap_free(&ve.r_bad);
 out_dbad:
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index b5774736..4a9b91f2 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -50,6 +50,7 @@ struct read_verify_pool {
 	void			*readbuf;	/* read buffer */
 	struct ptcounter	*verified_bytes;
 	struct ptvar		*rvstate;	/* combines read requests */
+	struct disk		*disk;		/* which disk? */
 	read_verify_ioerr_fn_t	ioerr_fn;	/* io error callback */
 	size_t			miniosz;	/* minimum io size, bytes */
 };
@@ -57,19 +58,18 @@ struct read_verify_pool {
 /*
  * Create a thread pool to run read verifiers.
  *
+ * @disk is the disk we want to verify.
  * @miniosz is the minimum size of an IO to expect (in bytes).
  * @ioerr_fn will be called when IO errors occur.
- * @nproc is the maximum number of verify requests that may be sent to a disk
- * at any given time.
  * @submitter_threads is the number of threads that may be sending verify
  * requests at any given time.
  */
 struct read_verify_pool *
 read_verify_pool_init(
 	struct scrub_ctx		*ctx,
+	struct disk			*disk,
 	size_t				miniosz,
 	read_verify_ioerr_fn_t		ioerr_fn,
-	unsigned int			nproc,
 	unsigned int			submitter_threads)
 {
 	struct read_verify_pool		*rvp;
@@ -89,6 +89,7 @@ read_verify_pool_init(
 		goto out_buf;
 	rvp->miniosz = miniosz;
 	rvp->ctx = ctx;
+	rvp->disk = disk;
 	rvp->ioerr_fn = ioerr_fn;
 	rvp->rvstate = ptvar_init(submitter_threads,
 			sizeof(struct read_verify));
@@ -97,7 +98,8 @@ read_verify_pool_init(
 	/* Run in the main thread if we only want one thread. */
 	if (nproc == 1)
 		nproc = 0;
-	ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc);
+	ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
+			disk_heads(disk));
 	if (ret)
 		goto out_rvstate;
 	return rvp;
@@ -150,17 +152,16 @@ read_verify(
 	rvp = (struct read_verify_pool *)wq->wq_ctx;
 	while (rv->io_length > 0) {
 		len = min(rv->io_length, RVP_IO_MAX_SIZE);
-		dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd,
-				rv->io_start, len);
-		sz = disk_read_verify(rv->io_disk, rvp->readbuf,
+		dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
 				rv->io_start, len);
+		sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
+				len);
 		if (sz < 0) {
 			dbg_printf("IOERR %d %"PRIu64" %zu\n",
-					rv->io_disk->d_fd,
-					rv->io_start, len);
+					rvp->disk->d_fd, rv->io_start, len);
 			/* IO error, so try the next logical block. */
 			len = rvp->miniosz;
-			rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len,
+			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len,
 					errno, rv->io_end_arg);
 		}
 
@@ -184,11 +185,11 @@ read_verify_queue(
 	bool				ret;
 
 	dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
-			rv->io_disk->d_fd, rv->io_start, rv->io_length);
+			rvp->disk->d_fd, rv->io_start, rv->io_length);
 
 	tmp = malloc(sizeof(struct read_verify));
 	if (!tmp) {
-		rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start,
+		rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start,
 				rv->io_length, errno, rv->io_end_arg);
 		return true;
 	}
@@ -212,7 +213,6 @@ _("Could not queue read-verify work."));
 bool
 read_verify_schedule_io(
 	struct read_verify_pool		*rvp,
-	struct disk			*disk,
 	uint64_t			start,
 	uint64_t			length,
 	void				*end_arg)
@@ -231,7 +231,7 @@ read_verify_schedule_io(
 	 * reporting is the same, and the two extents are close,
 	 * we can combine them.
 	 */
-	if (rv->io_length > 0 && disk == rv->io_disk &&
+	if (rv->io_length > 0 &&
 	    end_arg == rv->io_end_arg &&
 	    ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
 	     (rv->io_start >= start &&
@@ -244,7 +244,6 @@ read_verify_schedule_io(
 			return read_verify_queue(rvp, rv);
 
 		/* Stash the new IO. */
-		rv->io_disk = disk;
 		rv->io_start = start;
 		rv->io_length = length;
 		rv->io_end_arg = end_arg;
diff --git a/scrub/read_verify.h b/scrub/read_verify.h
index 1e7fd83f..5fabe5e0 100644
--- a/scrub/read_verify.h
+++ b/scrub/read_verify.h
@@ -8,6 +8,7 @@
 
 struct scrub_ctx;
 struct read_verify_pool;
+struct disk;
 
 /* Function called when an IO error happens. */
 typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx,
@@ -15,13 +16,14 @@ typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx,
 		int error, void *arg);
 
 struct read_verify_pool *read_verify_pool_init(struct scrub_ctx *ctx,
-		size_t miniosz, read_verify_ioerr_fn_t ioerr_fn,
-		unsigned int nproc, unsigned int submitter_threads);
+		struct disk *disk, size_t miniosz,
+		read_verify_ioerr_fn_t ioerr_fn,
+		unsigned int submitter_threads);
 void read_verify_pool_flush(struct read_verify_pool *rvp);
 void read_verify_pool_destroy(struct read_verify_pool *rvp);
 
-bool read_verify_schedule_io(struct read_verify_pool *rvp, struct disk *disk,
-		uint64_t start, uint64_t length, void *end_arg);
+bool read_verify_schedule_io(struct read_verify_pool *rvp, uint64_t start,
+		uint64_t length, void *end_arg);
 bool read_verify_force_io(struct read_verify_pool *rvp);
 uint64_t read_verify_bytes(struct read_verify_pool *rvp);