From: Darrick J. Wong <djwong@xxxxxxxxxx> While doing some chaos testing on the xfs_scrub read verify code, I noticed that if the device under a live filesystem gets resized while scrub is running a media scan, reads will start returning 0. This causes read_verify() to run around in an infinite loop instead of erroring out like it should. Cc: <linux-xfs@xxxxxxxxxxxxxxx> # v5.3.0 Fixes: 27464242956fac ("xfs_scrub: fix read verify disk error handling strategy") Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- scrub/phase6.c | 22 ++++++++++++++++++++++ scrub/read_verify.c | 8 ++++++++ 2 files changed, 30 insertions(+) diff --git a/scrub/phase6.c b/scrub/phase6.c index a61853019e290c..54d21820a722a6 100644 --- a/scrub/phase6.c +++ b/scrub/phase6.c @@ -44,6 +44,9 @@ struct media_verify_state { struct read_verify_pool *rvp_realtime; struct bitmap *d_bad; /* bytes */ struct bitmap *r_bad; /* bytes */ + bool d_trunc:1; + bool r_trunc:1; + bool l_trunc:1; }; /* Find the fd for a given device identifier. */ @@ -544,6 +547,13 @@ report_all_media_errors( { int ret; + if (vs->d_trunc) + str_corrupt(ctx, ctx->mntpoint, _("data device truncated")); + if (vs->l_trunc) + str_corrupt(ctx, ctx->mntpoint, _("log device truncated")); + if (vs->r_trunc) + str_corrupt(ctx, ctx->mntpoint, _("rt device truncated")); + ret = report_disk_ioerrs(ctx, ctx->datadev, vs); if (ret) { str_liberror(ctx, ret, _("walking datadev io errors")); @@ -663,6 +673,18 @@ remember_ioerr( struct bitmap *tree; int ret; + if (!length) { + dev_t dev = disk_to_dev(ctx, disk); + + if (dev == ctx->fsinfo.fs_datadev) + vs->d_trunc = true; + else if (dev == ctx->fsinfo.fs_rtdev) + vs->r_trunc = true; + else if (dev == ctx->fsinfo.fs_logdev) + vs->l_trunc = true; + return; + } + tree = bitmap_for_disk(ctx, disk, vs); if (!tree) { str_liberror(ctx, ENOENT, _("finding bad block bitmap")); diff --git a/scrub/read_verify.c b/scrub/read_verify.c index 52348274be2c25..1219efe2590182 100644 --- a/scrub/read_verify.c +++ b/scrub/read_verify.c @@ -245,6 +245,14 @@ read_verify( read_error); rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz, read_error, rv->io_end_arg); + } else if (sz == 0) { + /* No bytes at all? Did we hit the end of the disk? */ + dbg_printf("EOF %d @ %"PRIu64" %zu err %d\n", + rvp->disk->d_fd, rv->io_start, sz, + read_error); + rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz, + read_error, rv->io_end_arg); + break; } else if (sz < len) { /* * A short direct read suggests that we might have hit