Re: [PATCH v3 06/17] xfs: refactor ratelimited buffer error messages into helper

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 29, 2020 at 01:21:42PM -0400, Brian Foster wrote:
> XFS has some inconsistent log message rate limiting with respect to
> buffer alerts. The metadata I/O error notification uses the generic
> ratelimited alert, the buffer push code uses a custom rate limit and
> the similar quiesce time failure checks are not rate limited at all
> (when they should be).
> 
> The custom rate limit defined in the buf item code is specifically
> crafted for buffer alerts. It is more aggressive than generic rate
> limiting code because it must accommodate a high frequency of I/O
> error events in a relative short timeframe.
> 
> Factor out the custom rate limit state from the buf item code into a
> per-buftarg rate limit so various alerts are limited based on the
> target. Define a buffer alert helper function and use it for the
> buffer alerts that are already ratelimited.
> 
> Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>

I wonder if there's more that needs to be hooked to the buftarg
ratelimiter, but this seems reasonable enough on its own,

Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

--D

> ---
>  fs/xfs/xfs_buf.c      | 15 +++++++++++----
>  fs/xfs/xfs_buf.h      |  1 +
>  fs/xfs/xfs_buf_item.c | 17 ++++-------------
>  fs/xfs/xfs_message.c  | 22 ++++++++++++++++++++++
>  fs/xfs/xfs_message.h  |  3 +++
>  5 files changed, 41 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index fd76a84cefdd..594d5e1df6f8 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1244,10 +1244,10 @@ xfs_buf_ioerror_alert(
>  	struct xfs_buf		*bp,
>  	xfs_failaddr_t		func)
>  {
> -	xfs_alert_ratelimited(bp->b_mount,
> -"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
> -			func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
> -			-bp->b_error);
> +	xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
> +		"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
> +				  func, (uint64_t)XFS_BUF_ADDR(bp),
> +				  bp->b_length, -bp->b_error);
>  }
>  
>  /*
> @@ -1828,6 +1828,13 @@ xfs_alloc_buftarg(
>  	btp->bt_bdev = bdev;
>  	btp->bt_daxdev = dax_dev;
>  
> +	/*
> +	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
> +	 * per 30 seconds so as to not spam logs too much on repeated errors.
> +	 */
> +	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
> +			     DEFAULT_RATELIMIT_BURST);
> +
>  	if (xfs_setsize_buftarg_early(btp, bdev))
>  		goto error_free;
>  
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 06ea3eef866e..050c53b739e2 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -91,6 +91,7 @@ typedef struct xfs_buftarg {
>  	struct list_lru		bt_lru;
>  
>  	struct percpu_counter	bt_io_count;
> +	struct ratelimit_state	bt_ioerror_rl;
>  } xfs_buftarg_t;
>  
>  struct xfs_buf;
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index b452a399a441..1f7acffc99ba 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -481,14 +481,6 @@ xfs_buf_item_unpin(
>  	}
>  }
>  
> -/*
> - * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
> - * seconds so as to not spam logs too much on repeated detection of the same
> - * buffer being bad..
> - */
> -
> -static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
> -
>  STATIC uint
>  xfs_buf_item_push(
>  	struct xfs_log_item	*lip,
> @@ -518,11 +510,10 @@ xfs_buf_item_push(
>  	trace_xfs_buf_item_push(bip);
>  
>  	/* has a previous flush failed due to IO errors? */
> -	if ((bp->b_flags & XBF_WRITE_FAIL) &&
> -	    ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
> -		xfs_warn(bp->b_mount,
> -"Failing async write on buffer block 0x%llx. Retrying async write.",
> -			 (long long)bp->b_bn);
> +	if (bp->b_flags & XBF_WRITE_FAIL) {
> +		xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
> +	    "Failing async write on buffer block 0x%llx. Retrying async write.",
> +					  (long long)bp->b_bn);
>  	}
>  
>  	if (!xfs_buf_delwri_queue(bp, buffer_list))
> diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
> index e0f9d3b6abe9..bc66d95c8d4c 100644
> --- a/fs/xfs/xfs_message.c
> +++ b/fs/xfs/xfs_message.c
> @@ -117,3 +117,25 @@ xfs_hex_dump(const void *p, int length)
>  {
>  	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
>  }
> +
> +void
> +xfs_buf_alert_ratelimited(
> +	struct xfs_buf		*bp,
> +	const char		*rlmsg,
> +	const char		*fmt,
> +	...)
> +{
> +	struct xfs_mount	*mp = bp->b_mount;
> +	struct va_format	vaf;
> +	va_list			args;
> +
> +	/* use the more aggressive per-target rate limit for buffers */
> +	if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg))
> +		return;
> +
> +	va_start(args, fmt);
> +	vaf.fmt = fmt;
> +	vaf.va = &args;
> +	__xfs_printk(KERN_ALERT, mp, &vaf);
> +	va_end(args);
> +}
> diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
> index 0b05e10995a0..6be2ebe3a7b9 100644
> --- a/fs/xfs/xfs_message.h
> +++ b/fs/xfs/xfs_message.h
> @@ -62,4 +62,7 @@ void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
>  
>  extern void xfs_hex_dump(const void *p, int length);
>  
> +void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
> +			       const char *fmt, ...);
> +
>  #endif	/* __XFS_MESSAGE_H */
> -- 
> 2.21.1
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux