From: Chuck Lever <chuck.lever@xxxxxxxxxx> [ Upstream commit a9507f6af1450ed26a4a36d979af518f5bb21e5d ] Enable nfsd_prune_bucket() to drop the bucket lock while calling kfree(). Use the same pattern that Jeff recently introduced in the NFSD filecache. A few percpu operations are moved outside the lock since they temporarily disable local IRQs which is expensive and does not need to be done while the lock is held. Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> Stable-dep-of: c135e1269f34 ("NFSD: Refactor the duplicate reply cache shrinker") Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- fs/nfsd/nfscache.c | 78 ++++++++++++++++++++++++++++++++++++++++++----------- fs/nfsd/trace.h | 22 ++++++++++++++ 2 files changed, 85 insertions(+), 15 deletions(-) --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -117,6 +117,21 @@ static void nfsd_cacherep_free(struct sv kmem_cache_free(drc_slab, rp); } +static unsigned long +nfsd_cacherep_dispose(struct list_head *dispose) +{ + struct svc_cacherep *rp; + unsigned long freed = 0; + + while (!list_empty(dispose)) { + rp = list_first_entry(dispose, struct svc_cacherep, c_lru); + list_del(&rp->c_lru); + nfsd_cacherep_free(rp); + freed++; + } + return freed; +} + static void nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b, struct svc_cacherep *rp) @@ -259,6 +274,41 @@ nfsd_cache_bucket_find(__be32 xid, struc return &nn->drc_hashtbl[hash]; } +/* + * Remove and return no more than @max expired entries in bucket @b. + * If @max is zero, do not limit the number of removed entries. + */ +static void +nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b, + unsigned int max, struct list_head *dispose) +{ + unsigned long expiry = jiffies - RC_EXPIRE; + struct svc_cacherep *rp, *tmp; + unsigned int freed = 0; + + lockdep_assert_held(&b->cache_lock); + + /* The bucket LRU is ordered oldest-first. */ + list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + + if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries && + time_before(expiry, rp->c_timestamp)) + break; + + nfsd_cacherep_unlink_locked(nn, b, rp); + list_add(&rp->c_lru, dispose); + + if (max && ++freed > max) + break; + } +} + static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, unsigned int max) { @@ -282,11 +332,6 @@ static long prune_bucket(struct nfsd_drc return freed; } -static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) -{ - return prune_bucket(b, nn, 3); -} - /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -442,6 +487,8 @@ int nfsd_cache_lookup(struct svc_rqst *r __wsum csum; struct nfsd_drc_bucket *b; int type = rqstp->rq_cachetype; + unsigned long freed; + LIST_HEAD(dispose); int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; @@ -466,20 +513,18 @@ int nfsd_cache_lookup(struct svc_rqst *r found = nfsd_cache_insert(b, rp, nn); if (found != rp) goto found_entry; - - nfsd_stats_rc_misses_inc(); rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; + nfsd_prune_bucket_locked(nn, b, 3, &dispose); + spin_unlock(&b->cache_lock); + freed = nfsd_cacherep_dispose(&dispose); + trace_nfsd_drc_gc(nn, freed); + + nfsd_stats_rc_misses_inc(); atomic_inc(&nn->num_drc_entries); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); - - nfsd_prune_bucket(b, nn); - -out_unlock: - spin_unlock(&b->cache_lock); -out: - return rtn; + goto out; found_entry: /* We found a matching entry which is either in progress or done. */ @@ -517,7 +562,10 @@ found_entry: out_trace: trace_nfsd_drc_found(nn, rqstp, rtn); - goto out_unlock; +out_unlock: + spin_unlock(&b->cache_lock); +out: + return rtn; } /** --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1260,6 +1260,28 @@ TRACE_EVENT(nfsd_drc_mismatch, __entry->ingress) ); +TRACE_EVENT_CONDITION(nfsd_drc_gc, + TP_PROTO( + const struct nfsd_net *nn, + unsigned long freed + ), + TP_ARGS(nn, freed), + TP_CONDITION(freed > 0), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned long, freed) + __field(int, total) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->freed = freed; + __entry->total = atomic_read(&nn->num_drc_entries); + ), + TP_printk("boot_time=%16llx total=%d freed=%lu", + __entry->boot_time, __entry->total, __entry->freed + ) +); + TRACE_EVENT(nfsd_cb_args, TP_PROTO( const struct nfs4_client *clp, Patches currently in stable-queue which might be from cel@xxxxxxxxxx are queue-5.15/nfsd-rename-nfsd_net_-to-nfsd_stats_.patch queue-5.15/nfsd-rename-nfsd_reply_cache_alloc.patch queue-5.15/sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch queue-5.15/nfsd-move-init-of-percpu-reply_cache_stats-counters-back-to-nfsd_init_net.patch queue-5.15/sunrpc-use-the-struct-net-as-the-svc-proc-private.patch queue-5.15/nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch queue-5.15/nfsd-replace-nfsd_prune_bucket.patch queue-5.15/nfsd-refactor-the-duplicate-reply-cache-shrinker.patch queue-5.15/sunrpc-remove-pg_stats-from-svc_program.patch queue-5.15/nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch queue-5.15/nfsd-fix-frame-size-warning-in-svc_export_parse.patch queue-5.15/nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch queue-5.15/nfsd-move-reply-cache-initialization-into-nfsd-startup.patch queue-5.15/nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch queue-5.15/nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch queue-5.15/sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch queue-5.15/nfsd-refactor-nfsd_reply_cache_free_locked.patch queue-5.15/nfsd-stop-setting-pg_stats-for-unused-stats.patch