On 9/25/19 4:36 PM, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > When we report media errors, we should tell the administrator the file > offset and length of the bad region, not just the offset of the entire > file extent record that overlaps a bad region. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > libfrog/bitmap.c | 37 +++++++++++++++++++++++++++++++++++++ > libfrog/bitmap.h | 2 ++ > scrub/phase6.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- > 3 files changed, 86 insertions(+), 5 deletions(-) > > > diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c > index a75d085a..6a88ef48 100644 > --- a/libfrog/bitmap.c > +++ b/libfrog/bitmap.c > @@ -339,6 +339,43 @@ bitmap_iterate( > } > #endif > > +/* Iterate the set regions of part of this bitmap. */ > +int > +bitmap_iterate_range( > + struct bitmap *bmap, > + uint64_t start, > + uint64_t length, > + int (*fn)(uint64_t, uint64_t, void *), > + void *arg) > +{ > + struct avl64node *firstn; > + struct avl64node *lastn; > + struct avl64node *pos; > + struct avl64node *n; > + struct avl64node *l; > + struct bitmap_node *ext; > + int ret = 0; > + > + pthread_mutex_lock(&bmap->bt_lock); > + > + avl64_findranges(bmap->bt_tree, start, start + length, &firstn, > + &lastn); > + > + if (firstn == NULL && lastn == NULL) > + goto out; > + > + avl_for_each_range_safe(pos, n, l, firstn, lastn) { > + ext = container_of(pos, struct bitmap_node, btn_node); > + ret = fn(ext->btn_start, ext->btn_length, arg); > + if (ret) > + break; > + } > + > +out: > + pthread_mutex_unlock(&bmap->bt_lock); > + return ret; > +} > + > /* Do any bitmap extents overlap the given one? (locked) */ > static bool > __bitmap_test( > diff --git a/libfrog/bitmap.h b/libfrog/bitmap.h > index 759386a8..043b77ee 100644 > --- a/libfrog/bitmap.h > +++ b/libfrog/bitmap.h > @@ -16,6 +16,8 @@ void bitmap_free(struct bitmap **bmap); > int bitmap_set(struct bitmap *bmap, uint64_t start, uint64_t length); > int bitmap_iterate(struct bitmap *bmap, int (*fn)(uint64_t, uint64_t, void *), > void *arg); > +int bitmap_iterate_range(struct bitmap *bmap, uint64_t start, uint64_t length, > + int (*fn)(uint64_t, uint64_t, void *), void *arg); > bool bitmap_test(struct bitmap *bmap, uint64_t start, > uint64_t len); > bool bitmap_empty(struct bitmap *bmap); > diff --git a/scrub/phase6.c b/scrub/phase6.c > index 1edd98af..a16ad114 100644 > --- a/scrub/phase6.c > +++ b/scrub/phase6.c > @@ -111,6 +111,41 @@ xfs_decode_special_owner( > > /* Routines to translate bad physical extents into file paths and offsets. */ > > +struct badfile_report { > + struct scrub_ctx *ctx; > + const char *descr; > + struct xfs_bmap *bmap; > +}; > + > +/* Report on bad extents found during a media scan. */ > +static int > +report_badfile( > + uint64_t start, > + uint64_t length, > + void *arg) > +{ > + struct badfile_report *br = arg; > + unsigned long long bad_offset; > + unsigned long long bad_length; > + > + /* Clamp the bad region to the file mapping. */ > + if (start < br->bmap->bm_physical) { > + length -= br->bmap->bm_physical - start; > + start = br->bmap->bm_physical; > + } > + length = min(length, br->bmap->bm_length); > + > + /* Figure out how far into the bmap is the bad mapping and report it. */ > + bad_offset = start - br->bmap->bm_physical; > + bad_length = min(start + length, > + br->bmap->bm_physical + br->bmap->bm_length) - start; > + > + str_error(br->ctx, br->descr, > +_("media error at data offset %llu length %llu."), > + br->bmap->bm_offset + bad_offset, bad_length); > + return 0; > +} > + > /* Report if this extent overlaps a bad region. */ > static bool > report_data_loss( > @@ -122,8 +157,14 @@ report_data_loss( > struct xfs_bmap *bmap, > void *arg) > { > + struct badfile_report br = { > + .ctx = ctx, > + .descr = descr, > + .bmap = bmap, > + }; > struct media_verify_state *vs = arg; > struct bitmap *bmp; > + int ret; > > /* Only report errors for real extents. */ > if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) > @@ -134,11 +175,12 @@ report_data_loss( > else > bmp = vs->d_bad; > > - if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) > - return true; > - > - str_error(ctx, descr, > -_("offset %llu failed read verification."), bmap->bm_offset); > + ret = bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length, > + report_badfile, &br); > + if (ret) { > + str_liberror(ctx, ret, descr); > + return false; So related to the prior question; why does changing the way we report a media error in a file change the flow with a "return false" here? > + } > return true; > } > >