From: Darrick J. Wong <djwong@xxxxxxxxxx> If we're doing an fstrim by block number, progress is made in linear order across the AG by increasing block number. The fact that our scan cursor increases monotonically makes it trivial to relax the AGF lock to prevent other threads from blocking in the kernel for long periods of time. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/xfs_discard.c | 36 +++++++++++++++++++++++++++++++----- fs/xfs/xfs_trace.h | 1 + 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 9cddfa005105..ec3f470537fd 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -20,11 +20,17 @@ #include "xfs_ag.h" #include "xfs_health.h" +/* + * For trim functions that support it, cycle the metadata locks periodically + * to prevent other parts of the filesystem from starving. + */ +#define XFS_TRIM_RELAX_INTERVAL (HZ) + /* Trim the free space in this AG by block number. */ static inline int xfs_trim_ag_bybno( struct xfs_perag *pag, - struct xfs_buf *agbp, + struct xfs_buf **agbpp, xfs_daddr_t start, xfs_daddr_t end, xfs_daddr_t minlen, @@ -33,12 +39,13 @@ xfs_trim_ag_bybno( struct xfs_mount *mp = pag->pag_mount; struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); struct xfs_btree_cur *cur; - struct xfs_agf *agf = agbp->b_addr; + struct xfs_agf *agf = (*agbpp)->b_addr; xfs_daddr_t end_daddr; xfs_agnumber_t agno = pag->pag_agno; xfs_agblock_t start_agbno; xfs_agblock_t end_agbno; xfs_extlen_t minlen_fsb = XFS_BB_TO_FSB(mp, minlen); + unsigned long last_relax = jiffies; int i; int error; @@ -49,7 +56,7 @@ xfs_trim_ag_bybno( end = min(end, end_daddr - 1); end_agbno = xfs_daddr_to_agbno(mp, end); - cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_BNO); + cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag, XFS_BTNUM_BNO); error = xfs_alloc_lookup_le(cur, start_agbno, 0, &i); if (error) @@ -119,8 +126,27 @@ xfs_trim_ag_bybno( goto out_del_cursor; *blocks_trimmed += flen; + if (time_after(jiffies, last_relax + XFS_TRIM_RELAX_INTERVAL)) { + /* + * Cycle the AGF lock since we know how to pick up + * where we left off. + */ + trace_xfs_discard_relax(mp, agno, fbno, flen); + xfs_btree_del_cursor(cur, error); + xfs_buf_relse(*agbpp); + + error = xfs_alloc_read_agf(pag, NULL, 0, agbpp); + if (error) + return error; + + cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag, + XFS_BTNUM_BNO); + error = xfs_alloc_lookup_ge(cur, fbno + flen, 0, &i); + last_relax = jiffies; + } else { next_extent: - error = xfs_btree_increment(cur, 0, &i); + error = xfs_btree_increment(cur, 0, &i); + } if (error) goto out_del_cursor; @@ -258,7 +284,7 @@ xfs_trim_ag_extents( end < XFS_AGB_TO_DADDR(mp, pag->pag_agno, be32_to_cpu(agf->agf_length)) - 1) { /* Only trimming part of this AG */ - error = xfs_trim_ag_bybno(pag, agbp, start, end, minlen, + error = xfs_trim_ag_bybno(pag, &agbp, start, end, minlen, blocks_trimmed); } else { /* Trim this entire AG */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 26d6e9694c2e..e3a22c3c61a3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2487,6 +2487,7 @@ DEFINE_DISCARD_EVENT(xfs_discard_extent); DEFINE_DISCARD_EVENT(xfs_discard_toosmall); DEFINE_DISCARD_EVENT(xfs_discard_exclude); DEFINE_DISCARD_EVENT(xfs_discard_busy); +DEFINE_DISCARD_EVENT(xfs_discard_relax); /* btree cursor events */ TRACE_DEFINE_ENUM(XFS_BTNUM_BNOi);