Re: [PATCH 7/7] xfs_repair: retain superblock buffer to avoid write hook deadlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> Fix this by retaining a reference to the superblock buffer when possible
> so that the writeback hook doesn't have to access the buffer cache to
> set NEEDSREPAIR.

This is the same one you sent for 5.19 that opened a discussion between
retaining it at specific points, or at 'mount' time, wasn't it? Anyway, I think
this is ok, we can try to do it at mount time in the future.


> 
> Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
> Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> ---
>  libxfs/libxfs_api_defs.h |    2 +
>  libxfs/libxfs_io.h       |    1 +
>  libxfs/rdwr.c            |    8 +++++
>  repair/phase2.c          |    8 +++++
>  repair/protos.h          |    1 +
>  repair/xfs_repair.c      |   75 ++++++++++++++++++++++++++++++++++++++++------
>  6 files changed, 86 insertions(+), 9 deletions(-)
> 
> 
> diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
> index 2716a731bf..f8efcce777 100644
> --- a/libxfs/libxfs_api_defs.h
> +++ b/libxfs/libxfs_api_defs.h
> @@ -53,9 +53,11 @@
>  #define xfs_buf_delwri_submit		libxfs_buf_delwri_submit
>  #define xfs_buf_get			libxfs_buf_get
>  #define xfs_buf_get_uncached		libxfs_buf_get_uncached
> +#define xfs_buf_lock			libxfs_buf_lock
>  #define xfs_buf_read			libxfs_buf_read
>  #define xfs_buf_read_uncached		libxfs_buf_read_uncached
>  #define xfs_buf_relse			libxfs_buf_relse
> +#define xfs_buf_unlock			libxfs_buf_unlock
>  #define xfs_bunmapi			libxfs_bunmapi
>  #define xfs_bwrite			libxfs_bwrite
>  #define xfs_calc_dquots_per_chunk	libxfs_calc_dquots_per_chunk
> diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> index 9c0e2704d1..fae8642720 100644
> --- a/libxfs/libxfs_io.h
> +++ b/libxfs/libxfs_io.h
> @@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp)
>  }
> 
>  void xfs_buf_lock(struct xfs_buf *bp);
> +void xfs_buf_unlock(struct xfs_buf *bp);
> 
>  int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
>  		struct xfs_buf **bpp);
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 20e0793c2f..d5aad3ea21 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -384,6 +384,14 @@ xfs_buf_lock(
>  		pthread_mutex_lock(&bp->b_lock);
>  }
> 
> +void
> +xfs_buf_unlock(
> +	struct xfs_buf	*bp)
> +{
> +	if (use_xfs_buf_lock)
> +		pthread_mutex_unlock(&bp->b_lock);
> +}
> +
>  static int
>  __cache_lookup(
>  	struct xfs_bufkey	*key,
> diff --git a/repair/phase2.c b/repair/phase2.c
> index 56a39bb456..2ada95aefd 100644
> --- a/repair/phase2.c
> +++ b/repair/phase2.c
> @@ -370,6 +370,14 @@ phase2(
>  	} else
>  		do_log(_("Phase 2 - using internal log\n"));
> 
> +	/*
> +	 * Now that we've set up the buffer cache the way we want it, try to
> +	 * grab our own reference to the primary sb so that the hooks will not
> +	 * have to call out to the buffer cache.
> +	 */
> +	if (mp->m_buf_writeback_fn)
> +		retain_primary_sb(mp);
> +
>  	/* Zero log if applicable */
>  	do_log(_("        - zero log...\n"));
> 
> diff --git a/repair/protos.h b/repair/protos.h
> index 03ebae1413..83e471ff2a 100644
> --- a/repair/protos.h
> +++ b/repair/protos.h
> @@ -16,6 +16,7 @@ int	get_sb(xfs_sb_t			*sbp,
>  		xfs_off_t			off,
>  		int			size,
>  		xfs_agnumber_t		agno);
> +int retain_primary_sb(struct xfs_mount *mp);
>  void	write_primary_sb(xfs_sb_t	*sbp,
>  			int		size);
> 
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 871b428d7d..ff29bea974 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -749,6 +749,63 @@ check_fs_vs_host_sectsize(
>  	}
>  }
> 
> +/*
> + * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
> + * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
> + * cache while trying to get the primary sb buffer if the first non-sb write to
> + * the filesystem is the result of a cache shake.  Retain a reference to the
> + * primary sb buffer to avoid all that.
> + */
> +static struct xfs_buf *primary_sb_bp;	/* buffer for superblock */
> +
> +int
> +retain_primary_sb(
> +	struct xfs_mount	*mp)
> +{
> +	int			error;
> +
> +	error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
> +			XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
> +			&xfs_sb_buf_ops);
> +	if (error)
> +		return error;
> +
> +	libxfs_buf_unlock(primary_sb_bp);
> +	return 0;
> +}
> +
> +static void
> +drop_primary_sb(void)
> +{
> +	if (!primary_sb_bp)
> +		return;
> +
> +	libxfs_buf_lock(primary_sb_bp);
> +	libxfs_buf_relse(primary_sb_bp);
> +	primary_sb_bp = NULL;
> +}
> +
> +static int
> +get_primary_sb(
> +	struct xfs_mount	*mp,
> +	struct xfs_buf		**bpp)
> +{
> +	int			error;
> +
> +	*bpp = NULL;
> +
> +	if (!primary_sb_bp) {
> +		error = retain_primary_sb(mp);
> +		if (error)
> +			return error;
> +	}
> +
> +	libxfs_buf_lock(primary_sb_bp);
> +	xfs_buf_hold(primary_sb_bp);
> +	*bpp = primary_sb_bp;
> +	return 0;
> +}
> +
>  /* Clear needsrepair after a successful repair run. */
>  static void
>  clear_needsrepair(
> @@ -769,15 +826,14 @@ clear_needsrepair(
>  		do_warn(
>  	_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
>  			error);
> -		return;
> +		goto drop;
>  	}
> 
>  	/* Clear needsrepair from the superblock. */
> -	bp = libxfs_getsb(mp);
> -	if (!bp || bp->b_error) {
> +	error = get_primary_sb(mp, &bp);
> +	if (error) {
>  		do_warn(
> -	_("Cannot clear needsrepair from primary super, err=%d.\n"),
> -			bp ? bp->b_error : ENOMEM);
> +	_("Cannot clear needsrepair from primary super, err=%d.\n"), error);
>  	} else {
>  		mp->m_sb.sb_features_incompat &=
>  				~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> @@ -786,6 +842,8 @@ clear_needsrepair(
>  	}
>  	if (bp)
>  		libxfs_buf_relse(bp);
> +drop:
> +	drop_primary_sb();
>  }
> 
>  static void
> @@ -808,11 +866,10 @@ force_needsrepair(
>  	    xfs_sb_version_needsrepair(&mp->m_sb))
>  		return;
> 
> -	bp = libxfs_getsb(mp);
> -	if (!bp || bp->b_error) {
> +	error = get_primary_sb(mp, &bp);
> +	if (error) {
>  		do_log(
> -	_("couldn't get superblock to set needsrepair, err=%d\n"),
> -				bp ? bp->b_error : ENOMEM);
> +	_("couldn't get superblock to set needsrepair, err=%d\n"), error);
>  	} else {
>  		/*
>  		 * It's possible that we need to set NEEDSREPAIR before we've
> 

-- 
Carlos Maiolino



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux