From: Dave Chinner <dchinner@xxxxxxxxxx> [ Upstream commit b742d7b4f0e03df25c2a772adcded35044b625ca ] [ 6.1: resolved conflict in xfs_extfree_item.c ] Btrees that aren't freespace management trees use the normal extent allocation and freeing routines for their blocks. Hence when a btree block is freed, a direct call to xfs_free_extent() is made and the extent is immediately freed. This puts the entire free space management btrees under this path, so we are stacking btrees on btrees in the call stack. The inobt, finobt and refcount btrees all do this. However, the bmap btree does not do this - it calls xfs_free_extent_later() to defer the extent free operation via an XEFI and hence it gets processed in deferred operation processing during the commit of the primary transaction (i.e. via intent chaining). We need to change xfs_free_extent() to behave in a non-blocking manner so that we can avoid deadlocks with busy extents near ENOSPC in transactions that free multiple extents. Inserting or removing a record from a btree can cause a multi-level tree merge operation and that will free multiple blocks from the btree in a single transaction. i.e. we can call xfs_free_extent() multiple times, and hence the btree manipulation transaction is vulnerable to this busy extent deadlock vector. To fix this, convert all the remaining callers of xfs_free_extent() to use xfs_free_extent_later() to queue XEFIs and hence defer processing of the extent frees to a context that can be safely restarted if a deadlock condition is detected. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> Reviewed-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> Signed-off-by: Leah Rumancik <leah.rumancik@xxxxxxxxx> Acked-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_ag.c | 2 +- fs/xfs/libxfs/xfs_alloc.c | 4 ++++ fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- fs/xfs/libxfs/xfs_bmap.c | 8 +++++--- fs/xfs/libxfs/xfs_bmap_btree.c | 3 ++- fs/xfs/libxfs/xfs_ialloc.c | 8 ++++---- fs/xfs/libxfs/xfs_ialloc_btree.c | 3 +-- fs/xfs/libxfs/xfs_refcount.c | 9 ++++++--- fs/xfs/libxfs/xfs_refcount_btree.c | 8 +------- fs/xfs/xfs_extfree_item.c | 3 ++- fs/xfs/xfs_reflink.c | 3 ++- 11 files changed, 33 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index ee0d56854b83..e03bfeacbed4 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -905,11 +905,11 @@ xfs_ag_shrink_space( be32_add_cpu(&agf->agf_length, delta); if (err2 != -ENOSPC) goto resv_err; err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, - true); + XFS_AG_RESV_NONE, true); if (err2) goto resv_err; /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index af447605051b..e44f3f5c6d27 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2505,55 +2505,59 @@ xfs_defer_agfl_block( xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = fsbno; xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; + xefi->xefi_agresv = XFS_AG_RESV_AGFL; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); return 0; } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ int __xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, bool skip_discard) { struct xfs_extent_free_item *xefi; #ifdef DEBUG struct xfs_mount *mp = tp->t_mountp; xfs_agnumber_t agno; xfs_agblock_t agbno; ASSERT(bno != NULLFSBLOCK); ASSERT(len > 0); ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); agno = XFS_FSB_TO_AGNO(mp, bno); agbno = XFS_FSB_TO_AGBNO(mp, bno); ASSERT(agno < mp->m_sb.sb_agcount); ASSERT(agbno < mp->m_sb.sb_agblocks); ASSERT(len < mp->m_sb.sb_agblocks); ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; xefi->xefi_blockcount = (xfs_extlen_t)len; + xefi->xefi_agresv = type; if (skip_discard) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { ASSERT(oinfo->oi_offset == 0); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index cfc9dcdc1753..bbedb18651de 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -213,36 +213,38 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); + enum xfs_ag_resv_type type, bool skip_discard); /* * List of extents to be free "later". * The list is kept sorted on xbf_startblock. */ struct xfs_extent_free_item { struct list_head xefi_list; uint64_t xefi_owner; xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ unsigned int xefi_flags; + enum xfs_ag_resv_type xefi_agresv; }; #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ static inline int xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, - const struct xfs_owner_info *oinfo) + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) { - return __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); } extern struct kmem_cache *xfs_extfree_item_cache; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 29781a124323..aac071e4d000 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -572,11 +572,12 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error; ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); @@ -5206,12 +5207,13 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - (bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN); + XFS_AG_RESV_NONE, + ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN)); if (error) goto done; } } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index bac2a6496a8e..57f401f2492d 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -286,11 +286,12 @@ xfs_bmbt_free_block( xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error; ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 448ea76d50f8..d1472cbd48ff 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1844,12 +1844,12 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ return xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); + M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE); } /* holemask is only 16-bits (fits in an unsigned long) */ ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0])); holemask[0] = rec->ir_holemask; @@ -1890,12 +1890,12 @@ xfs_difree_inode_chunk( mp->m_sb.sb_inopblock; ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); if (error) return error; /* reset range to current bit and carry on... */ startidx = endidx = nextbit; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 2dbe553d87fb..7125447cde1a 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -158,12 +158,11 @@ __xfs_inobt_free_block( { xfs_fsblock_t fsbno; xfs_inobt_mod_blockcount(cur, -1); fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); - return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_INOBT, resv); } STATIC int xfs_inobt_free_block( diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index fec1ad95988c..4ec7a81dd3ef 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1128,11 +1128,12 @@ xfs_refcount_adjust_extents( } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - tmp.rc_blockcount, NULL); + tmp.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } (*agbno) += tmp.rc_blockcount; @@ -1189,11 +1190,12 @@ xfs_refcount_adjust_extents( } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - ext.rc_blockcount, NULL); + ext.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } skip: @@ -1961,11 +1963,12 @@ xfs_refcount_recover_cow_leftovers( xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); /* Free the block. */ error = xfs_free_extent_later(tp, fsb, - rr->rr_rrec.rc_blockcount, NULL); + rr->rr_rrec.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_trans; error = xfs_trans_commit(tp); if (error) diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3d8e62da2ccc..fbd53b6951a9 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -104,23 +104,17 @@ xfs_refcountbt_free_block( { struct xfs_mount *mp = cur->bc_mp; struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - int error; trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); - if (error) - return error; - - return error; } STATIC int xfs_refcountbt_get_minrecs( struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index c1aae07467c9..b670863d8467 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -367,11 +367,11 @@ xfs_trans_free_extent( trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, xefi->xefi_blockcount); pag = xfs_perag_get(mp, agno); error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount, - &oinfo, XFS_AG_RESV_NONE, + &oinfo, xefi->xefi_agresv, xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); xfs_perag_put(pag); /* * Mark the transaction dirty, even on error. This ensures the @@ -626,10 +626,11 @@ xfs_efi_item_recover( efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { struct xfs_extent_free_item fake = { .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + .xefi_agresv = XFS_AG_RESV_NONE, }; struct xfs_extent *extp; extp = &efip->efi_format.efi_extents[i]; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ee187e90d943..1bac6a8af970 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -617,11 +617,12 @@ xfs_reflink_cancel_cow_blocks( /* Free the CoW orphan record. */ xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); error = xfs_free_extent_later(*tpp, del.br_startblock, - del.br_blockcount, NULL); + del.br_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) break; /* Roll the transaction */ error = xfs_defer_finish(tpp); -- 2.49.0.rc1.451.g8f38331e32-goog