From: Darrick J. Wong <djwong@xxxxxxxxxx> Implement the same scaling down of inodegc delays when we're tight on quota. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/xfs_dquot.h | 10 ++++++ fs/xfs/xfs_icache.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_trace.h | 34 ++++++++++++++++++++ 3 files changed, 125 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index f642884a6834..6b5e3cf40c8b 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -54,6 +54,16 @@ struct xfs_dquot_res { xfs_qwarncnt_t warnings; }; +static inline bool +xfs_dquot_res_over_limits( + const struct xfs_dquot_res *qres) +{ + if ((qres->softlimit && qres->softlimit < qres->reserved) || + (qres->hardlimit && qres->hardlimit < qres->reserved)) + return true; + return false; +} + /* * The incore dquot structure */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 6418e50518f8..7ba80d7bff41 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -212,6 +212,73 @@ xfs_reclaim_work_queue( rcu_read_unlock(); } +/* + * Scale down the background work delay if we're close to a quota limit. + * Similar to the way that we throttle preallocations, we halve the delay time + * for every low free space threshold that isn't met, and we zero it if we're + * over the hard limit. Return value is in ms. + */ +static inline unsigned int +xfs_gc_delay_dquot( + struct xfs_inode *ip, + xfs_dqtype_t type, + unsigned int tag, + unsigned int delay_ms) +{ + struct xfs_dquot *dqp; + int64_t freesp; + unsigned int shift = 0; + + if (!ip) + goto out; + + /* + * Leave the delay untouched if there are no quota limits to enforce. + * These comparisons are done locklessly because at worst we schedule + * background work sooner than necessary. + */ + dqp = xfs_inode_dquot(ip, type); + if (!dqp || !xfs_dquot_is_enforced(dqp)) + goto out; + + if (xfs_dquot_res_over_limits(&dqp->q_ino) || + xfs_dquot_res_over_limits(&dqp->q_rtb)) { + trace_xfs_gc_delay_dquot(dqp, tag, 32); + return 0; + } + + /* no hi watermark, no throttle */ + if (!dqp->q_prealloc_hi_wmark) + goto out; + + /* under the lo watermark, no throttle */ + if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark) + goto out; + + /* If we're over the hard limit, run immediately. */ + if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark) { + trace_xfs_gc_delay_dquot(dqp, tag, 32); + return 0; + } + + /* Scale down the delay if we're close to the soft limits. */ + freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved; + if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT]) { + shift = 2; + if (freesp < dqp->q_low_space[XFS_QLOWSP_3_PCNT]) + shift += 2; + if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) + shift += 2; + } + + if (shift) + trace_xfs_gc_delay_dquot(dqp, tag, shift); + + delay_ms >>= shift; +out: + return delay_ms; +} + /* * Scale down the background work delay if we're low on free space. Similar to * the way that we throttle preallocations, we halve the delay time for every @@ -247,14 +314,17 @@ xfs_gc_delay_freesp( /* * Compute the lag between scheduling and executing some kind of background - * garbage collection work. Return value is in ms. + * garbage collection work. Return value is in ms. If an inode is passed in, + * its dquots will be considered in the lag computation. */ static inline unsigned int xfs_gc_delay_ms( struct xfs_mount *mp, + struct xfs_inode *ip, unsigned int tag) { unsigned int default_ms; + unsigned int udelay, gdelay, pdelay, fdelay; switch (tag) { case XFS_ICI_INODEGC_TAG: @@ -272,7 +342,12 @@ xfs_gc_delay_ms( return 0; } - return xfs_gc_delay_freesp(mp, tag, default_ms); + udelay = xfs_gc_delay_dquot(ip, XFS_DQTYPE_USER, tag, default_ms); + gdelay = xfs_gc_delay_dquot(ip, XFS_DQTYPE_GROUP, tag, default_ms); + pdelay = xfs_gc_delay_dquot(ip, XFS_DQTYPE_PROJ, tag, default_ms); + fdelay = xfs_gc_delay_freesp(mp, tag, default_ms); + + return min(min(udelay, gdelay), min(pdelay, fdelay)); } /* @@ -308,7 +383,7 @@ xfs_inodegc_queue( if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INODEGC_TAG)) { unsigned int delay; - delay = xfs_gc_delay_ms(mp, XFS_ICI_INODEGC_TAG); + delay = xfs_gc_delay_ms(mp, ip, XFS_ICI_INODEGC_TAG); trace_xfs_inodegc_queue(mp, delay); queue_delayed_work(mp->m_gc_workqueue, &mp->m_inodegc_work, msecs_to_jiffies(delay)); @@ -323,6 +398,7 @@ xfs_inodegc_queue( static void xfs_gc_requeue_now( struct xfs_mount *mp, + struct xfs_inode *ip, unsigned int tag) { struct delayed_work *dwork; @@ -347,7 +423,7 @@ xfs_gc_requeue_now( if (!radix_tree_tagged(&mp->m_perag_tree, tag)) goto unlock; - if (xfs_gc_delay_ms(mp, tag) == default_ms) + if (xfs_gc_delay_ms(mp, ip, tag) == default_ms) goto unlock; trace_xfs_gc_requeue_now(mp, tag); @@ -378,7 +454,7 @@ xfs_perag_set_inode_tag( pag->pag_ici_needs_inactive++; if (was_tagged) { - xfs_gc_requeue_now(mp, tag); + xfs_gc_requeue_now(mp, ip, tag); return; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2092a8542862..001fd202dbfb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -213,6 +213,40 @@ TRACE_EVENT(xfs_inodegc_requeue_mempressure, __entry->caller_ip) ); +TRACE_EVENT(xfs_gc_delay_dquot, + TP_PROTO(struct xfs_dquot *dqp, unsigned int tag, unsigned int shift), + TP_ARGS(dqp, tag, shift), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, id) + __field(xfs_dqtype_t, type) + __field(unsigned int, tag) + __field(unsigned int, shift) + __field(unsigned long long, reserved) + __field(unsigned long long, hi_mark) + __field(unsigned long long, lo_mark) + ), + TP_fast_assign( + __entry->dev = dqp->q_mount->m_super->s_dev; + __entry->id = dqp->q_id; + __entry->type = dqp->q_type; + __entry->reserved = dqp->q_blk.reserved; + __entry->hi_mark = dqp->q_prealloc_hi_wmark; + __entry->lo_mark = dqp->q_prealloc_lo_wmark; + __entry->tag = tag; + __entry->shift = shift; + ), + TP_printk("dev %d:%d tag %u shift %u dqid 0x%x dqtype %s reserved %llu hi %llu lo %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tag, + __entry->shift, + __entry->id, + __print_flags(__entry->type, "|", XFS_DQTYPE_STRINGS), + __entry->reserved, + __entry->hi_mark, + __entry->lo_mark) +); + TRACE_EVENT(xfs_gc_delay_fdblocks, TP_PROTO(struct xfs_mount *mp, unsigned int tag, unsigned int shift), TP_ARGS(mp, tag, shift),