On 3/20/19 3:06 PM, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > Simplify the read/verify pool code further by creating one pool per > disk. This enables us to tailor the concurrency levels of each disk to > that specific disk so that if we have a mixed hdd/ssd environment we > don't flood the hdd with a lot of requests. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Reviewed-by: Eric Sandeen <sandeen@xxxxxxxxxx> > --- > v2: fix uninitialized variable error pointed out by Eric Sandeen > --- > scrub/phase6.c | 110 ++++++++++++++++++++++++++++++++++++--------------- > scrub/read_verify.c | 29 ++++++------- > scrub/read_verify.h | 10 +++-- > 3 files changed, 98 insertions(+), 51 deletions(-) > > diff --git a/scrub/phase6.c b/scrub/phase6.c > index fe121769..5e2b9ee3 100644 > --- a/scrub/phase6.c > +++ b/scrub/phase6.c > @@ -33,18 +33,29 @@ > * and report the paths of the now corrupt files. > */ > > +/* Verify disk blocks with GETFSMAP */ > + > +struct xfs_verify_extent { > + struct read_verify_pool *rvp_data; > + struct read_verify_pool *rvp_log; > + struct read_verify_pool *rvp_realtime; > + struct bitmap *d_bad; /* bytes */ > + struct bitmap *r_bad; /* bytes */ > +}; > + > /* Find the fd for a given device identifier. */ > -static struct disk * > -xfs_dev_to_disk( > - struct scrub_ctx *ctx, > - dev_t dev) > +static struct read_verify_pool * > +xfs_dev_to_pool( > + struct scrub_ctx *ctx, > + struct xfs_verify_extent *ve, > + dev_t dev) > { > if (dev == ctx->fsinfo.fs_datadev) > - return ctx->datadev; > + return ve->rvp_data; > else if (dev == ctx->fsinfo.fs_logdev) > - return ctx->logdev; > + return ve->rvp_log; > else if (dev == ctx->fsinfo.fs_rtdev) > - return ctx->rtdev; > + return ve->rvp_realtime; > abort(); > } > > @@ -285,14 +296,6 @@ xfs_report_verify_errors( > return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei); > } > > -/* Verify disk blocks with GETFSMAP */ > - > -struct xfs_verify_extent { > - struct read_verify_pool *readverify; > - struct bitmap *d_bad; /* bytes */ > - struct bitmap *r_bad; /* bytes */ > -}; > - > /* Report an IO error resulting from read-verify based off getfsmap. */ > static bool > xfs_check_rmap_error_report( > @@ -393,7 +396,9 @@ xfs_check_rmap( > void *arg) > { > struct xfs_verify_extent *ve = arg; > - struct disk *disk; > + struct read_verify_pool *rvp; > + > + rvp = xfs_dev_to_pool(ctx, ve, map->fmr_device); > > dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64 > " offset %"PRIu64" len %"PRIu64" flags 0x%x\n", > @@ -420,19 +425,32 @@ xfs_check_rmap( > /* XXX: Filter out directory data blocks. */ > > /* Schedule the read verify command for (eventual) running. */ > - disk = xfs_dev_to_disk(ctx, map->fmr_device); > - > - read_verify_schedule_io(ve->readverify, disk, map->fmr_physical, > - map->fmr_length, ve); > + read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, ve); > > out: > /* Is this the last extent? Fire off the read. */ > if (map->fmr_flags & FMR_OF_LAST) > - read_verify_force_io(ve->readverify); > + read_verify_force_io(rvp); > > return true; > } > > +/* Wait for read/verify actions to finish, then return # bytes checked. */ > +static uint64_t > +clean_pool( > + struct read_verify_pool *rvp) > +{ > + uint64_t ret; > + > + if (!rvp) > + return 0; > + > + read_verify_pool_flush(rvp); > + ret = read_verify_bytes(rvp); > + read_verify_pool_destroy(rvp); > + return ret; > +} > + > /* > * Read verify all the file data blocks in a filesystem. Since XFS doesn't > * do data checksums, we trust that the underlying storage will pass back > @@ -445,7 +463,7 @@ bool > xfs_scan_blocks( > struct scrub_ctx *ctx) > { > - struct xfs_verify_extent ve; > + struct xfs_verify_extent ve = { NULL }; > bool moveon; > > moveon = bitmap_init(&ve.d_bad); > @@ -460,21 +478,43 @@ xfs_scan_blocks( > goto out_dbad; > } > > - ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize, > - xfs_check_rmap_ioerr, disk_heads(ctx->datadev), > + ve.rvp_data = read_verify_pool_init(ctx, ctx->datadev, > + ctx->geo.blocksize, xfs_check_rmap_ioerr, > scrub_nproc(ctx)); > - if (!ve.readverify) { > + if (!ve.rvp_data) { > moveon = false; > str_info(ctx, ctx->mntpoint, > -_("Could not create media verifier.")); > +_("Could not create data device media verifier.")); > goto out_rbad; > } > + if (ctx->logdev) { > + ve.rvp_log = read_verify_pool_init(ctx, ctx->logdev, > + ctx->geo.blocksize, xfs_check_rmap_ioerr, > + scrub_nproc(ctx)); > + if (!ve.rvp_log) { > + moveon = false; > + str_info(ctx, ctx->mntpoint, > + _("Could not create log device media verifier.")); > + goto out_datapool; > + } > + } > + if (ctx->rtdev) { > + ve.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev, > + ctx->geo.blocksize, xfs_check_rmap_ioerr, > + scrub_nproc(ctx)); > + if (!ve.rvp_realtime) { > + moveon = false; > + str_info(ctx, ctx->mntpoint, > + _("Could not create realtime device media verifier.")); > + goto out_logpool; > + } > + } > moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve); > if (!moveon) > - goto out_pool; > - read_verify_pool_flush(ve.readverify); > - ctx->bytes_checked += read_verify_bytes(ve.readverify); > - read_verify_pool_destroy(ve.readverify); > + goto out_rtpool; > + ctx->bytes_checked += clean_pool(ve.rvp_data); > + ctx->bytes_checked += clean_pool(ve.rvp_log); > + ctx->bytes_checked += clean_pool(ve.rvp_realtime); > > /* Scan the whole dir tree to see what matches the bad extents. */ > if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad)) > @@ -484,8 +524,14 @@ _("Could not create media verifier.")); > bitmap_free(&ve.d_bad); > return moveon; > > -out_pool: > - read_verify_pool_destroy(ve.readverify); > +out_rtpool: > + if (ve.rvp_realtime) > + read_verify_pool_destroy(ve.rvp_realtime); > +out_logpool: > + if (ve.rvp_log) > + read_verify_pool_destroy(ve.rvp_log); > +out_datapool: > + read_verify_pool_destroy(ve.rvp_data); > out_rbad: > bitmap_free(&ve.r_bad); > out_dbad: > diff --git a/scrub/read_verify.c b/scrub/read_verify.c > index b5774736..4a9b91f2 100644 > --- a/scrub/read_verify.c > +++ b/scrub/read_verify.c > @@ -50,6 +50,7 @@ struct read_verify_pool { > void *readbuf; /* read buffer */ > struct ptcounter *verified_bytes; > struct ptvar *rvstate; /* combines read requests */ > + struct disk *disk; /* which disk? */ > read_verify_ioerr_fn_t ioerr_fn; /* io error callback */ > size_t miniosz; /* minimum io size, bytes */ > }; > @@ -57,19 +58,18 @@ struct read_verify_pool { > /* > * Create a thread pool to run read verifiers. > * > + * @disk is the disk we want to verify. > * @miniosz is the minimum size of an IO to expect (in bytes). > * @ioerr_fn will be called when IO errors occur. > - * @nproc is the maximum number of verify requests that may be sent to a disk > - * at any given time. > * @submitter_threads is the number of threads that may be sending verify > * requests at any given time. > */ > struct read_verify_pool * > read_verify_pool_init( > struct scrub_ctx *ctx, > + struct disk *disk, > size_t miniosz, > read_verify_ioerr_fn_t ioerr_fn, > - unsigned int nproc, > unsigned int submitter_threads) > { > struct read_verify_pool *rvp; > @@ -89,6 +89,7 @@ read_verify_pool_init( > goto out_buf; > rvp->miniosz = miniosz; > rvp->ctx = ctx; > + rvp->disk = disk; > rvp->ioerr_fn = ioerr_fn; > rvp->rvstate = ptvar_init(submitter_threads, > sizeof(struct read_verify)); > @@ -97,7 +98,8 @@ read_verify_pool_init( > /* Run in the main thread if we only want one thread. */ > if (nproc == 1) > nproc = 0; > - ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc); > + ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, > + disk_heads(disk)); > if (ret) > goto out_rvstate; > return rvp; > @@ -150,17 +152,16 @@ read_verify( > rvp = (struct read_verify_pool *)wq->wq_ctx; > while (rv->io_length > 0) { > len = min(rv->io_length, RVP_IO_MAX_SIZE); > - dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd, > - rv->io_start, len); > - sz = disk_read_verify(rv->io_disk, rvp->readbuf, > + dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd, > rv->io_start, len); > + sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start, > + len); > if (sz < 0) { > dbg_printf("IOERR %d %"PRIu64" %zu\n", > - rv->io_disk->d_fd, > - rv->io_start, len); > + rvp->disk->d_fd, rv->io_start, len); > /* IO error, so try the next logical block. */ > len = rvp->miniosz; > - rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len, > + rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len, > errno, rv->io_end_arg); > } > > @@ -184,11 +185,11 @@ read_verify_queue( > bool ret; > > dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n", > - rv->io_disk->d_fd, rv->io_start, rv->io_length); > + rvp->disk->d_fd, rv->io_start, rv->io_length); > > tmp = malloc(sizeof(struct read_verify)); > if (!tmp) { > - rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, > + rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, > rv->io_length, errno, rv->io_end_arg); > return true; > } > @@ -212,7 +213,6 @@ _("Could not queue read-verify work.")); > bool > read_verify_schedule_io( > struct read_verify_pool *rvp, > - struct disk *disk, > uint64_t start, > uint64_t length, > void *end_arg) > @@ -231,7 +231,7 @@ read_verify_schedule_io( > * reporting is the same, and the two extents are close, > * we can combine them. > */ > - if (rv->io_length > 0 && disk == rv->io_disk && > + if (rv->io_length > 0 && > end_arg == rv->io_end_arg && > ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) || > (rv->io_start >= start && > @@ -244,7 +244,6 @@ read_verify_schedule_io( > return read_verify_queue(rvp, rv); > > /* Stash the new IO. */ > - rv->io_disk = disk; > rv->io_start = start; > rv->io_length = length; > rv->io_end_arg = end_arg; > diff --git a/scrub/read_verify.h b/scrub/read_verify.h > index 1e7fd83f..5fabe5e0 100644 > --- a/scrub/read_verify.h > +++ b/scrub/read_verify.h > @@ -8,6 +8,7 @@ > > struct scrub_ctx; > struct read_verify_pool; > +struct disk; > > /* Function called when an IO error happens. */ > typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx, > @@ -15,13 +16,14 @@ typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx, > int error, void *arg); > > struct read_verify_pool *read_verify_pool_init(struct scrub_ctx *ctx, > - size_t miniosz, read_verify_ioerr_fn_t ioerr_fn, > - unsigned int nproc, unsigned int submitter_threads); > + struct disk *disk, size_t miniosz, > + read_verify_ioerr_fn_t ioerr_fn, > + unsigned int submitter_threads); > void read_verify_pool_flush(struct read_verify_pool *rvp); > void read_verify_pool_destroy(struct read_verify_pool *rvp); > > -bool read_verify_schedule_io(struct read_verify_pool *rvp, struct disk *disk, > - uint64_t start, uint64_t length, void *end_arg); > +bool read_verify_schedule_io(struct read_verify_pool *rvp, uint64_t start, > + uint64_t length, void *end_arg); > bool read_verify_force_io(struct read_verify_pool *rvp); > uint64_t read_verify_bytes(struct read_verify_pool *rvp); > >