Re: [PATCH 2/8] xfs: use deferred frees for btree block freeing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jun 28, 2023 at 08:44:06AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Btrees that aren't freespace management trees use the normal extent
> allocation and freeing routines for their blocks. Hence when a btree
> block is freed, a direct call to xfs_free_extent() is made and the
> extent is immediately freed. This puts the entire free space
> management btrees under this path, so we are stacking btrees on
> btrees in the call stack. The inobt, finobt and refcount btrees
> all do this.
> 
> However, the bmap btree does not do this - it calls
> xfs_free_extent_later() to defer the extent free operation via an
> XEFI and hence it gets processed in deferred operation processing
> during the commit of the primary transaction (i.e. via intent
> chaining).
> 
> We need to change xfs_free_extent() to behave in a non-blocking
> manner so that we can avoid deadlocks with busy extents near ENOSPC
> in transactions that free multiple extents. Inserting or removing a
> record from a btree can cause a multi-level tree merge operation and
> that will free multiple blocks from the btree in a single
> transaction. i.e. we can call xfs_free_extent() multiple times, and
> hence the btree manipulation transaction is vulnerable to this busy
> extent deadlock vector.
> 
> To fix this, convert all the remaining callers of xfs_free_extent()
> to use xfs_free_extent_later() to queue XEFIs and hence defer
> processing of the extent frees to a context that can be safely
> restarted if a deadlock condition is detected.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_ag.c             | 2 +-
>  fs/xfs/libxfs/xfs_alloc.c          | 4 ++++
>  fs/xfs/libxfs/xfs_alloc.h          | 8 +++++---
>  fs/xfs/libxfs/xfs_bmap.c           | 8 +++++---
>  fs/xfs/libxfs/xfs_bmap_btree.c     | 3 ++-
>  fs/xfs/libxfs/xfs_ialloc.c         | 8 ++++----
>  fs/xfs/libxfs/xfs_ialloc_btree.c   | 3 +--
>  fs/xfs/libxfs/xfs_refcount.c       | 9 ++++++---
>  fs/xfs/libxfs/xfs_refcount_btree.c | 8 +-------
>  fs/xfs/xfs_extfree_item.c          | 3 ++-
>  fs/xfs/xfs_reflink.c               | 3 ++-
>  11 files changed, 33 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index ee84835ebc66..e9cc481b4ddf 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -985,7 +985,7 @@ xfs_ag_shrink_space(
>  			goto resv_err;
>  
>  		err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
> -				true);
> +				XFS_AG_RESV_NONE, true);
>  		if (err2)
>  			goto resv_err;
>  
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index c20fe99405d8..cc3f7b905ea1 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -2449,6 +2449,7 @@ xfs_defer_agfl_block(
>  	xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
>  	xefi->xefi_blockcount = 1;
>  	xefi->xefi_owner = oinfo->oi_owner;
> +	xefi->xefi_type = XFS_AG_RESV_AGFL;
>  	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
>  		return -EFSCORRUPTED;
> @@ -2470,6 +2471,7 @@ __xfs_free_extent_later(
>  	xfs_fsblock_t			bno,
>  	xfs_filblks_t			len,
>  	const struct xfs_owner_info	*oinfo,
> +	enum xfs_ag_resv_type		type,
>  	bool				skip_discard)
>  {
>  	struct xfs_extent_free_item	*xefi;
> @@ -2490,6 +2492,7 @@ __xfs_free_extent_later(
>  	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
>  #endif
>  	ASSERT(xfs_extfree_item_cache != NULL);
> +	ASSERT(type != XFS_AG_RESV_AGFL);
>  
>  	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
>  		return -EFSCORRUPTED;
> @@ -2498,6 +2501,7 @@ __xfs_free_extent_later(
>  			       GFP_KERNEL | __GFP_NOFAIL);
>  	xefi->xefi_startblock = bno;
>  	xefi->xefi_blockcount = (xfs_extlen_t)len;
> +	xefi->xefi_type = type;
>  	if (skip_discard)
>  		xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
>  	if (oinfo) {
> diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
> index 85ac470be0da..121faf1e11ad 100644
> --- a/fs/xfs/libxfs/xfs_alloc.h
> +++ b/fs/xfs/libxfs/xfs_alloc.h
> @@ -232,7 +232,7 @@ xfs_buf_to_agfl_bno(
>  
>  int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
>  		xfs_filblks_t len, const struct xfs_owner_info *oinfo,
> -		bool skip_discard);
> +		enum xfs_ag_resv_type type, bool skip_discard);
>  
>  /*
>   * List of extents to be free "later".
> @@ -245,6 +245,7 @@ struct xfs_extent_free_item {
>  	xfs_extlen_t		xefi_blockcount;/* number of blocks in extent */
>  	struct xfs_perag	*xefi_pag;
>  	unsigned int		xefi_flags;

/me is barely back from vacation, starting to process the ~1100 emails
by taking care of the obvious bugfixes first...

> +	enum xfs_ag_resv_type	xefi_type;

I got confused by 'xefi_type' until I remembered that
XFS_DEFER_OPS_TYPE_AGFL_FREE / XFS_DEFER_OPS_TYPE_FREE are stuffed in
the xfs_defer_pending structure, not the xefi itself.

Could this field be named xefi_agresv instead?

The rest of the logic in this patch looks correct and makes things
easier for the rt modernization patches, so I'll say

Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

and change the name on commit, if that's ok?

--D

>  };
>  
>  void xfs_extent_free_get_group(struct xfs_mount *mp,
> @@ -259,9 +260,10 @@ xfs_free_extent_later(
>  	struct xfs_trans		*tp,
>  	xfs_fsblock_t			bno,
>  	xfs_filblks_t			len,
> -	const struct xfs_owner_info	*oinfo)
> +	const struct xfs_owner_info	*oinfo,
> +	enum xfs_ag_resv_type		type)
>  {
> -	return __xfs_free_extent_later(tp, bno, len, oinfo, false);
> +	return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
>  }
>  
>  
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index fef35696adb7..30c931b38853 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
>  		return error;
>  
>  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> -	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
> +	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
> +			XFS_AG_RESV_NONE);
>  	if (error)
>  		return error;
>  
> @@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
>  		} else {
>  			error = __xfs_free_extent_later(tp, del->br_startblock,
>  					del->br_blockcount, NULL,
> -					(bflags & XFS_BMAPI_NODISCARD) ||
> -					del->br_state == XFS_EXT_UNWRITTEN);
> +					XFS_AG_RESV_NONE,
> +					((bflags & XFS_BMAPI_NODISCARD) ||
> +					del->br_state == XFS_EXT_UNWRITTEN));
>  			if (error)
>  				goto done;
>  		}
> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
> index 36564ae3084f..bf3f1b36fdd2 100644
> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
> @@ -271,7 +271,8 @@ xfs_bmbt_free_block(
>  	int			error;
>  
>  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
> -	error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
> +	error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
> +			XFS_AG_RESV_NONE);
>  	if (error)
>  		return error;
>  
> diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
> index 34600f94c2f4..1e5fafbc0cdb 100644
> --- a/fs/xfs/libxfs/xfs_ialloc.c
> +++ b/fs/xfs/libxfs/xfs_ialloc.c
> @@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
>  		/* not sparse, calculate extent info directly */
>  		return xfs_free_extent_later(tp,
>  				XFS_AGB_TO_FSB(mp, agno, sagbno),
> -				M_IGEO(mp)->ialloc_blks,
> -				&XFS_RMAP_OINFO_INODES);
> +				M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
> +				XFS_AG_RESV_NONE);
>  	}
>  
>  	/* holemask is only 16-bits (fits in an unsigned long) */
> @@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
>  		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
>  		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
>  		error = xfs_free_extent_later(tp,
> -				XFS_AGB_TO_FSB(mp, agno, agbno),
> -				contigblk, &XFS_RMAP_OINFO_INODES);
> +				XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
> +				&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
>  		if (error)
>  			return error;
>  
> diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
> index 5a945ae21b5d..9258f01c0015 100644
> --- a/fs/xfs/libxfs/xfs_ialloc_btree.c
> +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
> @@ -160,8 +160,7 @@ __xfs_inobt_free_block(
>  
>  	xfs_inobt_mod_blockcount(cur, -1);
>  	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
> -	return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
> -			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
> +	return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
>  			&XFS_RMAP_OINFO_INOBT, resv);
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
> index b6e21433925c..70ab113c9cea 100644
> --- a/fs/xfs/libxfs/xfs_refcount.c
> +++ b/fs/xfs/libxfs/xfs_refcount.c
> @@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
>  						cur->bc_ag.pag->pag_agno,
>  						tmp.rc_startblock);
>  				error = xfs_free_extent_later(cur->bc_tp, fsbno,
> -						  tmp.rc_blockcount, NULL);
> +						  tmp.rc_blockcount, NULL,
> +						  XFS_AG_RESV_NONE);
>  				if (error)
>  					goto out_error;
>  			}
> @@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
>  					cur->bc_ag.pag->pag_agno,
>  					ext.rc_startblock);
>  			error = xfs_free_extent_later(cur->bc_tp, fsbno,
> -					ext.rc_blockcount, NULL);
> +					ext.rc_blockcount, NULL,
> +					XFS_AG_RESV_NONE);
>  			if (error)
>  				goto out_error;
>  		}
> @@ -1981,7 +1983,8 @@ xfs_refcount_recover_cow_leftovers(
>  
>  		/* Free the block. */
>  		error = xfs_free_extent_later(tp, fsb,
> -				rr->rr_rrec.rc_blockcount, NULL);
> +				rr->rr_rrec.rc_blockcount, NULL,
> +				XFS_AG_RESV_NONE);
>  		if (error)
>  			goto out_trans;
>  
> diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
> index d4afc5f4e6a5..5c3987d8dc24 100644
> --- a/fs/xfs/libxfs/xfs_refcount_btree.c
> +++ b/fs/xfs/libxfs/xfs_refcount_btree.c
> @@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
>  	struct xfs_buf		*agbp = cur->bc_ag.agbp;
>  	struct xfs_agf		*agf = agbp->b_addr;
>  	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
> -	int			error;
>  
>  	trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
>  			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
>  	be32_add_cpu(&agf->agf_refcount_blocks, -1);
>  	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
> -	error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
> -			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
> +	return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
>  			&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
> -	if (error)
> -		return error;
> -
> -	return error;
>  }
>  
>  STATIC int
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index f9e36b810663..79e65bb6b0a2 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -365,7 +365,7 @@ xfs_trans_free_extent(
>  			agbno, xefi->xefi_blockcount);
>  
>  	error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
> -			xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
> +			xefi->xefi_blockcount, &oinfo, xefi->xefi_type,
>  			xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
>  
>  	/*
> @@ -644,6 +644,7 @@ xfs_efi_item_recover(
>  	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
>  		struct xfs_extent_free_item	fake = {
>  			.xefi_owner		= XFS_RMAP_OWN_UNKNOWN,
> +			.xefi_type		= XFS_AG_RESV_NONE,
>  		};
>  		struct xfs_extent		*extp;
>  
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index abcc559f3c64..eb9102453aff 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
>  					del.br_blockcount);
>  
>  			error = xfs_free_extent_later(*tpp, del.br_startblock,
> -					  del.br_blockcount, NULL);
> +					del.br_blockcount, NULL,
> +					XFS_AG_RESV_NONE);
>  			if (error)
>  				break;
>  
> -- 
> 2.40.1
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux