Re: [PATCH 7/7] xfs_repair: retain superblock buffer to avoid write hook deadlock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 18, 2022 at 03:45:03PM +0100, Carlos Maiolino wrote:
> > Fix this by retaining a reference to the superblock buffer when possible
> > so that the writeback hook doesn't have to access the buffer cache to
> > set NEEDSREPAIR.
> 
> This is the same one you sent for 5.19 that opened a discussion between
> retaining it at specific points, or at 'mount' time, wasn't it? Anyway, I think
> this is ok, we can try to do it at mount time in the future.

Correct, it hasn't changed since then.

--D

> 
> > 
> > Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
> > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> > ---
> >  libxfs/libxfs_api_defs.h |    2 +
> >  libxfs/libxfs_io.h       |    1 +
> >  libxfs/rdwr.c            |    8 +++++
> >  repair/phase2.c          |    8 +++++
> >  repair/protos.h          |    1 +
> >  repair/xfs_repair.c      |   75 ++++++++++++++++++++++++++++++++++++++++------
> >  6 files changed, 86 insertions(+), 9 deletions(-)
> > 
> > 
> > diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
> > index 2716a731bf..f8efcce777 100644
> > --- a/libxfs/libxfs_api_defs.h
> > +++ b/libxfs/libxfs_api_defs.h
> > @@ -53,9 +53,11 @@
> >  #define xfs_buf_delwri_submit		libxfs_buf_delwri_submit
> >  #define xfs_buf_get			libxfs_buf_get
> >  #define xfs_buf_get_uncached		libxfs_buf_get_uncached
> > +#define xfs_buf_lock			libxfs_buf_lock
> >  #define xfs_buf_read			libxfs_buf_read
> >  #define xfs_buf_read_uncached		libxfs_buf_read_uncached
> >  #define xfs_buf_relse			libxfs_buf_relse
> > +#define xfs_buf_unlock			libxfs_buf_unlock
> >  #define xfs_bunmapi			libxfs_bunmapi
> >  #define xfs_bwrite			libxfs_bwrite
> >  #define xfs_calc_dquots_per_chunk	libxfs_calc_dquots_per_chunk
> > diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> > index 9c0e2704d1..fae8642720 100644
> > --- a/libxfs/libxfs_io.h
> > +++ b/libxfs/libxfs_io.h
> > @@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp)
> >  }
> > 
> >  void xfs_buf_lock(struct xfs_buf *bp);
> > +void xfs_buf_unlock(struct xfs_buf *bp);
> > 
> >  int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
> >  		struct xfs_buf **bpp);
> > diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> > index 20e0793c2f..d5aad3ea21 100644
> > --- a/libxfs/rdwr.c
> > +++ b/libxfs/rdwr.c
> > @@ -384,6 +384,14 @@ xfs_buf_lock(
> >  		pthread_mutex_lock(&bp->b_lock);
> >  }
> > 
> > +void
> > +xfs_buf_unlock(
> > +	struct xfs_buf	*bp)
> > +{
> > +	if (use_xfs_buf_lock)
> > +		pthread_mutex_unlock(&bp->b_lock);
> > +}
> > +
> >  static int
> >  __cache_lookup(
> >  	struct xfs_bufkey	*key,
> > diff --git a/repair/phase2.c b/repair/phase2.c
> > index 56a39bb456..2ada95aefd 100644
> > --- a/repair/phase2.c
> > +++ b/repair/phase2.c
> > @@ -370,6 +370,14 @@ phase2(
> >  	} else
> >  		do_log(_("Phase 2 - using internal log\n"));
> > 
> > +	/*
> > +	 * Now that we've set up the buffer cache the way we want it, try to
> > +	 * grab our own reference to the primary sb so that the hooks will not
> > +	 * have to call out to the buffer cache.
> > +	 */
> > +	if (mp->m_buf_writeback_fn)
> > +		retain_primary_sb(mp);
> > +
> >  	/* Zero log if applicable */
> >  	do_log(_("        - zero log...\n"));
> > 
> > diff --git a/repair/protos.h b/repair/protos.h
> > index 03ebae1413..83e471ff2a 100644
> > --- a/repair/protos.h
> > +++ b/repair/protos.h
> > @@ -16,6 +16,7 @@ int	get_sb(xfs_sb_t			*sbp,
> >  		xfs_off_t			off,
> >  		int			size,
> >  		xfs_agnumber_t		agno);
> > +int retain_primary_sb(struct xfs_mount *mp);
> >  void	write_primary_sb(xfs_sb_t	*sbp,
> >  			int		size);
> > 
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 871b428d7d..ff29bea974 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -749,6 +749,63 @@ check_fs_vs_host_sectsize(
> >  	}
> >  }
> > 
> > +/*
> > + * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
> > + * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
> > + * cache while trying to get the primary sb buffer if the first non-sb write to
> > + * the filesystem is the result of a cache shake.  Retain a reference to the
> > + * primary sb buffer to avoid all that.
> > + */
> > +static struct xfs_buf *primary_sb_bp;	/* buffer for superblock */
> > +
> > +int
> > +retain_primary_sb(
> > +	struct xfs_mount	*mp)
> > +{
> > +	int			error;
> > +
> > +	error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
> > +			XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
> > +			&xfs_sb_buf_ops);
> > +	if (error)
> > +		return error;
> > +
> > +	libxfs_buf_unlock(primary_sb_bp);
> > +	return 0;
> > +}
> > +
> > +static void
> > +drop_primary_sb(void)
> > +{
> > +	if (!primary_sb_bp)
> > +		return;
> > +
> > +	libxfs_buf_lock(primary_sb_bp);
> > +	libxfs_buf_relse(primary_sb_bp);
> > +	primary_sb_bp = NULL;
> > +}
> > +
> > +static int
> > +get_primary_sb(
> > +	struct xfs_mount	*mp,
> > +	struct xfs_buf		**bpp)
> > +{
> > +	int			error;
> > +
> > +	*bpp = NULL;
> > +
> > +	if (!primary_sb_bp) {
> > +		error = retain_primary_sb(mp);
> > +		if (error)
> > +			return error;
> > +	}
> > +
> > +	libxfs_buf_lock(primary_sb_bp);
> > +	xfs_buf_hold(primary_sb_bp);
> > +	*bpp = primary_sb_bp;
> > +	return 0;
> > +}
> > +
> >  /* Clear needsrepair after a successful repair run. */
> >  static void
> >  clear_needsrepair(
> > @@ -769,15 +826,14 @@ clear_needsrepair(
> >  		do_warn(
> >  	_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
> >  			error);
> > -		return;
> > +		goto drop;
> >  	}
> > 
> >  	/* Clear needsrepair from the superblock. */
> > -	bp = libxfs_getsb(mp);
> > -	if (!bp || bp->b_error) {
> > +	error = get_primary_sb(mp, &bp);
> > +	if (error) {
> >  		do_warn(
> > -	_("Cannot clear needsrepair from primary super, err=%d.\n"),
> > -			bp ? bp->b_error : ENOMEM);
> > +	_("Cannot clear needsrepair from primary super, err=%d.\n"), error);
> >  	} else {
> >  		mp->m_sb.sb_features_incompat &=
> >  				~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > @@ -786,6 +842,8 @@ clear_needsrepair(
> >  	}
> >  	if (bp)
> >  		libxfs_buf_relse(bp);
> > +drop:
> > +	drop_primary_sb();
> >  }
> > 
> >  static void
> > @@ -808,11 +866,10 @@ force_needsrepair(
> >  	    xfs_sb_version_needsrepair(&mp->m_sb))
> >  		return;
> > 
> > -	bp = libxfs_getsb(mp);
> > -	if (!bp || bp->b_error) {
> > +	error = get_primary_sb(mp, &bp);
> > +	if (error) {
> >  		do_log(
> > -	_("couldn't get superblock to set needsrepair, err=%d\n"),
> > -				bp ? bp->b_error : ENOMEM);
> > +	_("couldn't get superblock to set needsrepair, err=%d\n"), error);
> >  	} else {
> >  		/*
> >  		 * It's possible that we need to set NEEDSREPAIR before we've
> > 
> 
> -- 
> Carlos Maiolino



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux