On Mon, Oct 21, 2019 at 11:33:59AM -0500, Eric Sandeen wrote: > On 9/25/19 4:36 PM, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > > > When we report media errors, we should tell the administrator the file > > offset and length of the bad region, not just the offset of the entire > > file extent record that overlaps a bad region. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > --- > > libfrog/bitmap.c | 37 +++++++++++++++++++++++++++++++++++++ > > libfrog/bitmap.h | 2 ++ > > scrub/phase6.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- > > 3 files changed, 86 insertions(+), 5 deletions(-) > > > > > > diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c > > index a75d085a..6a88ef48 100644 > > --- a/libfrog/bitmap.c > > +++ b/libfrog/bitmap.c > > @@ -339,6 +339,43 @@ bitmap_iterate( > > } > > #endif > > > > +/* Iterate the set regions of part of this bitmap. */ > > +int > > +bitmap_iterate_range( > > + struct bitmap *bmap, > > + uint64_t start, > > + uint64_t length, > > + int (*fn)(uint64_t, uint64_t, void *), > > + void *arg) > > +{ > > + struct avl64node *firstn; > > + struct avl64node *lastn; > > + struct avl64node *pos; > > + struct avl64node *n; > > + struct avl64node *l; > > + struct bitmap_node *ext; > > + int ret = 0; > > + > > + pthread_mutex_lock(&bmap->bt_lock); > > + > > + avl64_findranges(bmap->bt_tree, start, start + length, &firstn, > > + &lastn); > > + > > + if (firstn == NULL && lastn == NULL) > > + goto out; > > + > > + avl_for_each_range_safe(pos, n, l, firstn, lastn) { > > + ext = container_of(pos, struct bitmap_node, btn_node); > > + ret = fn(ext->btn_start, ext->btn_length, arg); > > + if (ret) > > + break; > > + } > > + > > +out: > > + pthread_mutex_unlock(&bmap->bt_lock); > > + return ret; > > +} > > + > > /* Do any bitmap extents overlap the given one? (locked) */ > > static bool > > __bitmap_test( > > diff --git a/libfrog/bitmap.h b/libfrog/bitmap.h > > index 759386a8..043b77ee 100644 > > --- a/libfrog/bitmap.h > > +++ b/libfrog/bitmap.h > > @@ -16,6 +16,8 @@ void bitmap_free(struct bitmap **bmap); > > int bitmap_set(struct bitmap *bmap, uint64_t start, uint64_t length); > > int bitmap_iterate(struct bitmap *bmap, int (*fn)(uint64_t, uint64_t, void *), > > void *arg); > > +int bitmap_iterate_range(struct bitmap *bmap, uint64_t start, uint64_t length, > > + int (*fn)(uint64_t, uint64_t, void *), void *arg); > > bool bitmap_test(struct bitmap *bmap, uint64_t start, > > uint64_t len); > > bool bitmap_empty(struct bitmap *bmap); > > diff --git a/scrub/phase6.c b/scrub/phase6.c > > index 1edd98af..a16ad114 100644 > > --- a/scrub/phase6.c > > +++ b/scrub/phase6.c > > @@ -111,6 +111,41 @@ xfs_decode_special_owner( > > > > /* Routines to translate bad physical extents into file paths and offsets. */ > > > > +struct badfile_report { > > + struct scrub_ctx *ctx; > > + const char *descr; > > + struct xfs_bmap *bmap; > > +}; > > + > > +/* Report on bad extents found during a media scan. */ > > +static int > > +report_badfile( > > + uint64_t start, > > + uint64_t length, > > + void *arg) > > +{ > > + struct badfile_report *br = arg; > > + unsigned long long bad_offset; > > + unsigned long long bad_length; > > + > > + /* Clamp the bad region to the file mapping. */ > > + if (start < br->bmap->bm_physical) { > > + length -= br->bmap->bm_physical - start; > > + start = br->bmap->bm_physical; > > + } > > + length = min(length, br->bmap->bm_length); > > + > > + /* Figure out how far into the bmap is the bad mapping and report it. */ > > + bad_offset = start - br->bmap->bm_physical; > > + bad_length = min(start + length, > > + br->bmap->bm_physical + br->bmap->bm_length) - start; > > + > > + str_error(br->ctx, br->descr, > > +_("media error at data offset %llu length %llu."), > > + br->bmap->bm_offset + bad_offset, bad_length); > > + return 0; > > +} > > + > > /* Report if this extent overlaps a bad region. */ > > static bool > > report_data_loss( > > @@ -122,8 +157,14 @@ report_data_loss( > > struct xfs_bmap *bmap, > > void *arg) > > { > > + struct badfile_report br = { > > + .ctx = ctx, > > + .descr = descr, > > + .bmap = bmap, > > + }; > > struct media_verify_state *vs = arg; > > struct bitmap *bmp; > > + int ret; > > > > /* Only report errors for real extents. */ > > if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) > > @@ -134,11 +175,12 @@ report_data_loss( > > else > > bmp = vs->d_bad; > > > > - if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) > > - return true; > > - > > - str_error(ctx, descr, > > -_("offset %llu failed read verification."), bmap->bm_offset); > > + ret = bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length, > > + report_badfile, &br); > > + if (ret) { > > + str_liberror(ctx, ret, descr); > > + return false; > > So related to the prior question; why does changing the way we report a > media error in a file change the flow with a "return false" here? The bitmap iteration itself failed (since report_badfile never returns false), which means it hit a runtime error and the program should exit. (Granted, bitmap iteration never fails, so this is merely defensive programming.) --D > > + } > > return true; > > } > > > >