Re: [PATCH 02/11] xfs_scrub: improve reporting of file data media errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/25/19 4:36 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> When we report media errors, we should tell the administrator the file
> offset and length of the bad region, not just the offset of the entire
> file extent record that overlaps a bad region.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  libfrog/bitmap.c |   37 +++++++++++++++++++++++++++++++++++++
>  libfrog/bitmap.h |    2 ++
>  scrub/phase6.c   |   52 +++++++++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 86 insertions(+), 5 deletions(-)
> 
> 
> diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c
> index a75d085a..6a88ef48 100644
> --- a/libfrog/bitmap.c
> +++ b/libfrog/bitmap.c
> @@ -339,6 +339,43 @@ bitmap_iterate(
>  }
>  #endif
>  
> +/* Iterate the set regions of part of this bitmap. */
> +int
> +bitmap_iterate_range(
> +	struct bitmap		*bmap,
> +	uint64_t		start,
> +	uint64_t		length,
> +	int			(*fn)(uint64_t, uint64_t, void *),
> +	void			*arg)
> +{
> +	struct avl64node	*firstn;
> +	struct avl64node	*lastn;
> +	struct avl64node	*pos;
> +	struct avl64node	*n;
> +	struct avl64node	*l;
> +	struct bitmap_node	*ext;
> +	int			ret = 0;
> +
> +	pthread_mutex_lock(&bmap->bt_lock);
> +
> +	avl64_findranges(bmap->bt_tree, start, start + length, &firstn,
> +			&lastn);
> +
> +	if (firstn == NULL && lastn == NULL)
> +		goto out;
> +
> +	avl_for_each_range_safe(pos, n, l, firstn, lastn) {
> +		ext = container_of(pos, struct bitmap_node, btn_node);
> +		ret = fn(ext->btn_start, ext->btn_length, arg);
> +		if (ret)
> +			break;
> +	}
> +
> +out:
> +	pthread_mutex_unlock(&bmap->bt_lock);
> +	return ret;
> +}
> +
>  /* Do any bitmap extents overlap the given one?  (locked) */
>  static bool
>  __bitmap_test(
> diff --git a/libfrog/bitmap.h b/libfrog/bitmap.h
> index 759386a8..043b77ee 100644
> --- a/libfrog/bitmap.h
> +++ b/libfrog/bitmap.h
> @@ -16,6 +16,8 @@ void bitmap_free(struct bitmap **bmap);
>  int bitmap_set(struct bitmap *bmap, uint64_t start, uint64_t length);
>  int bitmap_iterate(struct bitmap *bmap, int (*fn)(uint64_t, uint64_t, void *),
>  		void *arg);
> +int bitmap_iterate_range(struct bitmap *bmap, uint64_t start, uint64_t length,
> +		int (*fn)(uint64_t, uint64_t, void *), void *arg);
>  bool bitmap_test(struct bitmap *bmap, uint64_t start,
>  		uint64_t len);
>  bool bitmap_empty(struct bitmap *bmap);
> diff --git a/scrub/phase6.c b/scrub/phase6.c
> index 1edd98af..a16ad114 100644
> --- a/scrub/phase6.c
> +++ b/scrub/phase6.c
> @@ -111,6 +111,41 @@ xfs_decode_special_owner(
>  
>  /* Routines to translate bad physical extents into file paths and offsets. */
>  
> +struct badfile_report {
> +	struct scrub_ctx	*ctx;
> +	const char		*descr;
> +	struct xfs_bmap		*bmap;
> +};
> +
> +/* Report on bad extents found during a media scan. */
> +static int
> +report_badfile(
> +	uint64_t		start,
> +	uint64_t		length,
> +	void			*arg)
> +{
> +	struct badfile_report	*br = arg;
> +	unsigned long long	bad_offset;
> +	unsigned long long	bad_length;
> +
> +	/* Clamp the bad region to the file mapping. */
> +	if (start < br->bmap->bm_physical) {
> +		length -= br->bmap->bm_physical - start;
> +		start = br->bmap->bm_physical;
> +	}
> +	length = min(length, br->bmap->bm_length);
> +
> +	/* Figure out how far into the bmap is the bad mapping and report it. */
> +	bad_offset = start - br->bmap->bm_physical;
> +	bad_length = min(start + length,
> +			 br->bmap->bm_physical + br->bmap->bm_length) - start;
> +
> +	str_error(br->ctx, br->descr,
> +_("media error at data offset %llu length %llu."),
> +			br->bmap->bm_offset + bad_offset, bad_length);
> +	return 0;
> +}
> +
>  /* Report if this extent overlaps a bad region. */
>  static bool
>  report_data_loss(
> @@ -122,8 +157,14 @@ report_data_loss(
>  	struct xfs_bmap			*bmap,
>  	void				*arg)
>  {
> +	struct badfile_report		br = {
> +		.ctx			= ctx,
> +		.descr			= descr,
> +		.bmap			= bmap,
> +	};
>  	struct media_verify_state	*vs = arg;
>  	struct bitmap			*bmp;
> +	int				ret;
>  
>  	/* Only report errors for real extents. */
>  	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
> @@ -134,11 +175,12 @@ report_data_loss(
>  	else
>  		bmp = vs->d_bad;
>  
> -	if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
> -		return true;
> -
> -	str_error(ctx, descr,
> -_("offset %llu failed read verification."), bmap->bm_offset);
> +	ret = bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length,
> +			report_badfile, &br);
> +	if (ret) {
> +		str_liberror(ctx, ret, descr);
> +		return false;

So related to the prior question; why does changing the way we report a
media error in a file change the flow with a "return false" here?

> +	}
>  	return true;
>  }
>  
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux