Re: [PATCH 06/11] xfs_scrub: reduce fsmap activity for media errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/25/19 4:36 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Right now we rather foolishly query the fsmap data for every single
> media error that we find.  This is a silly waste of time since we
> have yet to combine adjacent bad blocks into bad extents, so move the
> rmap query until after we've constructed the bad block bitmap data.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  libfrog/bitmap.c |   10 +---
>  scrub/phase6.c   |  148 ++++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 108 insertions(+), 50 deletions(-)
> 
> 
> diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c
> index 6a88ef48..5daa1081 100644
> --- a/libfrog/bitmap.c
> +++ b/libfrog/bitmap.c
> @@ -314,7 +314,6 @@ bitmap_clear(
>  }
>  #endif
>  
> -#ifdef DEBUG
>  /* Iterate the set regions of this bitmap. */
>  int
>  bitmap_iterate(
> @@ -324,20 +323,19 @@ bitmap_iterate(
>  {
>  	struct avl64node	*node;
>  	struct bitmap_node	*ext;
> -	int			error = 0;
> +	int			ret;
>  
>  	pthread_mutex_lock(&bmap->bt_lock);
>  	avl_for_each(bmap->bt_tree, node) {
>  		ext = container_of(node, struct bitmap_node, btn_node);
> -		error = fn(ext->btn_start, ext->btn_length, arg);
> -		if (error)
> +		ret = fn(ext->btn_start, ext->btn_length, arg);
> +		if (ret)
>  			break;
>  	}
>  	pthread_mutex_unlock(&bmap->bt_lock);
>  
> -	return error;
> +	return ret;
>  }
> -#endif
>  
>  /* Iterate the set regions of part of this bitmap. */
>  int
> diff --git a/scrub/phase6.c b/scrub/phase6.c
> index ec821373..378ea0fb 100644
> --- a/scrub/phase6.c
> +++ b/scrub/phase6.c
> @@ -341,27 +341,9 @@ xfs_report_verify_dirent(
>  	return moveon;
>  }
>  
> -/* Given bad extent lists for the data & rtdev, find bad files. */
> -static bool
> -xfs_report_verify_errors(
> -	struct scrub_ctx		*ctx,
> -	struct media_verify_state	*vs)
> -{
> -	bool				moveon;
> -
> -	/* Scan the directory tree to get file paths. */
> -	moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
> -			xfs_report_verify_dirent, vs);
> -	if (!moveon)
> -		return false;
> -
> -	/* Scan for unlinked files. */
> -	return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, vs);
> -}
> -
>  /* Report an IO error resulting from read-verify based off getfsmap. */
>  static bool
> -xfs_check_rmap_error_report(
> +ioerr_fsmap_report(
>  	struct scrub_ctx	*ctx,
>  	const char		*descr,
>  	struct fsmap		*map,
> @@ -409,12 +391,31 @@ xfs_check_rmap_error_report(
>  	return true;
>  }
>  
> +static struct bitmap *
> +bitmap_for_disk(
> +	struct scrub_ctx		*ctx,
> +	struct disk			*disk,
> +	struct media_verify_state	*vs)
> +{
> +	dev_t				dev = xfs_disk_to_dev(ctx, disk);
> +
> +	/*
> +	 * If we don't have parent pointers, save the bad extent for
> +	 * later rescanning.
> +	 */

This comment doesn't make sense here, does it?

> +	if (dev == ctx->fsinfo.fs_datadev)
> +		return vs->d_bad;
> +	else if (dev == ctx->fsinfo.fs_rtdev)
> +		return vs->r_bad;
> +	return NULL;
> +}
> +
>  /*
>   * Remember a read error for later, and see if rmap will tell us about the
>   * owner ahead of time.
>   */
>  static void
> -xfs_check_rmap_ioerr(
> +remember_ioerr(
>  	struct scrub_ctx		*ctx,
>  	struct disk			*disk,
>  	uint64_t			start,
> @@ -422,32 +423,39 @@ xfs_check_rmap_ioerr(
>  	int				error,
>  	void				*arg)
>  {
> -	struct fsmap			keys[2];
> -	char				descr[DESCR_BUFSZ];
>  	struct media_verify_state	*vs = arg;
>  	struct bitmap			*tree;
> -	dev_t				dev;
>  	int				ret;
>  
> -	dev = xfs_disk_to_dev(ctx, disk);
> +	tree = bitmap_for_disk(ctx, disk, vs);
> +	if (!tree)
> +		return;
>  
> -	/*
> -	 * If we don't have parent pointers, save the bad extent for
> -	 * later rescanning.
> -	 */
> -	if (dev == ctx->fsinfo.fs_datadev)
> -		tree = vs->d_bad;
> -	else if (dev == ctx->fsinfo.fs_rtdev)
> -		tree = vs->r_bad;
> -	else
> -		tree = NULL;
> -	if (tree) {
> -		ret = bitmap_set(tree, start, length);
> -		if (ret)
> -			str_liberror(ctx, ret, _("setting bad block bitmap"));
> -	}

Maybe that comment should be here?

> +	ret = bitmap_set(tree, start, length);
> +	if (ret)
> +		str_liberror(ctx, ret, _("setting bad block bitmap"));
> +}
> +
> +struct walk_ioerr {
> +	struct scrub_ctx	*ctx;
> +	struct disk		*disk;
> +};

comment here would be great.  Is this walking an ioerror?  Reporting an ioerror
from a walk?  (or maybe also/instead a comment on walk_ioerrs())

also whee, functions and structures w/ the same name :D

> +static int
> +walk_ioerr(
> +	uint64_t		start,
> +	uint64_t		length,
> +	void			*arg)
> +{
> +	struct walk_ioerr	*wioerr = arg;
> +	struct fsmap		keys[2];
> +	char			descr[DESCR_BUFSZ];
> +	dev_t			dev;
> +
> +	dev = xfs_disk_to_dev(wioerr->ctx, wioerr->disk);
>  
> -	snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
> +	snprintf(descr, DESCR_BUFSZ,
> +_("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
>  			major(dev), minor(dev), start, length);
>  
>  	/* Go figure out which blocks are bad from the fsmap. */
> @@ -459,8 +467,60 @@ xfs_check_rmap_ioerr(
>  	(keys + 1)->fmr_owner = ULLONG_MAX;
>  	(keys + 1)->fmr_offset = ULLONG_MAX;
>  	(keys + 1)->fmr_flags = UINT_MAX;
> -	xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
> +	xfs_iterate_fsmap(wioerr->ctx, descr, keys, ioerr_fsmap_report,
>  			&start);
> +	return 0;
> +}
> +
> +static int
> +walk_ioerrs(
> +	struct scrub_ctx		*ctx,
> +	struct disk			*disk,
> +	struct media_verify_state	*vs)
> +{
> +	struct walk_ioerr		wioerr = {
> +		.ctx			= ctx,
> +		.disk			= disk,
> +	};> +	struct bitmap			*tree;
> +
> +	if (!disk)
> +		return 0;
> +	tree = bitmap_for_disk(ctx, disk, vs);
> +	if (!tree)
> +		return 0;
> +	return bitmap_iterate(tree, walk_ioerr, &wioerr);
> +}
> +
> +/* Given bad extent lists for the data & rtdev, find bad files. */

maybe "find and report bad files" just to tie it in w/ the "report" in
the function name?

is this only for media errors?  Maybev xfs_report_media_errors?  There are
so many things going on in scrub (and apparently such a limited namespace
for functions :D) that I find myself wishing for a little more context
when I read something in isolation...


> +static bool
> +xfs_report_verify_errors(
> +	struct scrub_ctx		*ctx,
> +	struct media_verify_state	*vs)
> +{
> +	bool				moveon;
> +	int				ret;
> +
> +	ret = walk_ioerrs(ctx, ctx->datadev, vs);
> +	if (ret) {
> +		str_liberror(ctx, ret, _("walking datadev io errors"));
> +		return false;
> +	}
> +
> +	ret = walk_ioerrs(ctx, ctx->rtdev, vs);
> +	if (ret) {
> +		str_liberror(ctx, ret, _("walking rtdev io errors"));
> +		return false;
> +	}
> +
> +	/* Scan the directory tree to get file paths. */
> +	moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
> +			xfs_report_verify_dirent, vs);
> +	if (!moveon)
> +		return false;
> +
> +	/* Scan for unlinked files. */
> +	return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, vs);
>  }
>  
>  /* Schedule a read-verify of a (data block) extent. */
> @@ -571,7 +631,7 @@ xfs_scan_blocks(
>  	}
>  
>  	ret = read_verify_pool_alloc(ctx, ctx->datadev,
> -			ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
> +			ctx->mnt.fsgeom.blocksize, remember_ioerr,
>  			scrub_nproc(ctx), &vs.rvp_data);
>  	if (ret) {
>  		str_liberror(ctx, ret, _("creating datadev media verifier"));
> @@ -579,7 +639,7 @@ xfs_scan_blocks(
>  	}
>  	if (ctx->logdev) {
>  		ret = read_verify_pool_alloc(ctx, ctx->logdev,
> -				ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
> +				ctx->mnt.fsgeom.blocksize, remember_ioerr,
>  				scrub_nproc(ctx), &vs.rvp_log);
>  		if (ret) {
>  			str_liberror(ctx, ret,
> @@ -589,7 +649,7 @@ xfs_scan_blocks(
>  	}
>  	if (ctx->rtdev) {
>  		ret = read_verify_pool_alloc(ctx, ctx->rtdev,
> -				ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
> +				ctx->mnt.fsgeom.blocksize, remember_ioerr,
>  				scrub_nproc(ctx), &vs.rvp_realtime);
>  		if (ret) {
>  			str_liberror(ctx, ret,
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux