Thanks for looking at this, it's long overdue! On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote: > nfsd stats counters can be updated by concurrent nfsd threads without any > protection. > > Convert some nfsd_stats and nfsd_net struct members to use percpu counters. > > There are several members of struct nfsd_stats that are reported in file > /proc/net/rpc/nfsd by never updated. Those have been left untouched. Looking through the history, the code that updated fh_lookup, for example, was removed in 2002. I'd be OK with removing those entirely, maybe just leave a /* deprecated field */ comment where we printk the hard-coded 0's. If somebody wants to know more they can still find the answers in git. > The longest_chain* members of struct nfsd_net remain unprotected. > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > fs/nfsd/netns.h | 20 +++++++---- > fs/nfsd/nfs4proc.c | 2 +- > fs/nfsd/nfscache.c | 52 +++++++++++++++++++-------- > fs/nfsd/nfsctl.c | 5 ++- > fs/nfsd/nfsfh.c | 2 +- > fs/nfsd/stats.c | 87 ++++++++++++++++++++++++++++++++++++---------- > fs/nfsd/stats.h | 42 +++++++++++++++------- > fs/nfsd/vfs.c | 4 +-- > 8 files changed, 156 insertions(+), 58 deletions(-) > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index 7346acda9d76..080c5389b2e7 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -10,6 +10,7 @@ > > #include <net/net_namespace.h> > #include <net/netns/generic.h> > +#include <linux/percpu_counter.h> > > /* Hash tables for nfs4_clientid state */ > #define CLIENT_HASH_BITS 4 > @@ -149,20 +150,25 @@ struct nfsd_net { > > /* > * Stats and other tracking of on the duplicate reply cache. > - * These fields and the "rc" fields in nfsdstats are modified > - * with only the per-bucket cache lock, which isn't really safe > - * and should be fixed if we want the statistics to be > - * completely accurate. > + * The longest_chain* fields are modified with only the per-bucket > + * cache lock, which isn't really safe and should be fixed if we want > + * these statistics to be completely accurate. > */ > > /* total number of entries */ > atomic_t num_drc_entries; > > + /* Reference to below counters as array for init/destroy */ > + struct percpu_counter counters[0]; This feels slightly too clever for its own good, but.... OK, I see there's a bunch of initializations to do in the nfsdstats case, and you don't want to open code all that (and its error handling). I guess I don't have a better idea. Is this a common pattern elsewhere? Anyway, otherwise it all looks routine to me, thanks again. --b. > /* cache misses due only to checksum comparison failures */ > - unsigned int payload_misses; > - > + struct percpu_counter payload_misses; > /* amount of memory (in bytes) currently consumed by the DRC */ > - unsigned int drc_mem_usage; > + struct percpu_counter drc_mem_usage; > + /* End of counters array */ > + struct percpu_counter counters_end[0]; > +#define NFSD_NET_COUNTERS_NUM \ > + ((offsetof(struct nfsd_net, counters_end) - \ > + offsetof(struct nfsd_net, counters)) / sizeof(struct percpu_counter)) > > /* longest hash chain seen */ > unsigned int longest_chain; > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index e83b21778816..0fa205d8ce49 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -2173,7 +2173,7 @@ nfsd4_proc_null(struct svc_rqst *rqstp) > static inline void nfsd4_increment_op_stats(u32 opnum) > { > if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) > - nfsdstats.nfs4_opcount[opnum]++; > + percpu_counter_inc(&nfsdstats.nfs4_opcount[opnum]); > } > > static const struct nfsd4_operation nfsd4_ops[]; > diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c > index 80c90fc231a5..4093ab25cc4d 100644 > --- a/fs/nfsd/nfscache.c > +++ b/fs/nfsd/nfscache.c > @@ -121,14 +121,14 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, > struct nfsd_net *nn) > { > if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { > - nn->drc_mem_usage -= rp->c_replvec.iov_len; > + percpu_counter_sub(&nn->drc_mem_usage, rp->c_replvec.iov_len); > kfree(rp->c_replvec.iov_base); > } > if (rp->c_state != RC_UNUSED) { > rb_erase(&rp->c_node, &b->rb_head); > list_del(&rp->c_lru); > atomic_dec(&nn->num_drc_entries); > - nn->drc_mem_usage -= sizeof(*rp); > + percpu_counter_sub(&nn->drc_mem_usage, sizeof(*rp)); > } > kmem_cache_free(drc_slab, rp); > } > @@ -154,6 +154,16 @@ void nfsd_drc_slab_free(void) > kmem_cache_destroy(drc_slab); > } > > +static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) > +{ > + return nfsd_percpu_counters_init(nn->counters, NFSD_NET_COUNTERS_NUM); > +} > + > +static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) > +{ > + nfsd_percpu_counters_destroy(nn->counters, NFSD_NET_COUNTERS_NUM); > +} > + > int nfsd_reply_cache_init(struct nfsd_net *nn) > { > unsigned int hashsize; > @@ -165,12 +175,16 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) > hashsize = nfsd_hashsize(nn->max_drc_entries); > nn->maskbits = ilog2(hashsize); > > + status = nfsd_reply_cache_stats_init(nn); > + if (status) > + goto out_nomem; > + > nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; > nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; > nn->nfsd_reply_cache_shrinker.seeks = 1; > status = register_shrinker(&nn->nfsd_reply_cache_shrinker); > if (status) > - goto out_nomem; > + goto out_stats_destroy; > > nn->drc_hashtbl = kvzalloc(array_size(hashsize, > sizeof(*nn->drc_hashtbl)), GFP_KERNEL); > @@ -186,6 +200,8 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) > return 0; > out_shrinker: > unregister_shrinker(&nn->nfsd_reply_cache_shrinker); > +out_stats_destroy: > + nfsd_reply_cache_stats_destroy(nn); > out_nomem: > printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); > return -ENOMEM; > @@ -196,6 +212,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) > struct svc_cacherep *rp; > unsigned int i; > > + nfsd_reply_cache_stats_destroy(nn); > unregister_shrinker(&nn->nfsd_reply_cache_shrinker); > > for (i = 0; i < nn->drc_hashsize; i++) { > @@ -324,7 +341,7 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, > { > if (key->c_key.k_xid == rp->c_key.k_xid && > key->c_key.k_csum != rp->c_key.k_csum) { > - ++nn->payload_misses; > + percpu_counter_inc(&nn->payload_misses); > trace_nfsd_drc_mismatch(nn, key, rp); > } > > @@ -407,7 +424,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) > > rqstp->rq_cacherep = NULL; > if (type == RC_NOCACHE) { > - nfsdstats.rcnocache++; > + percpu_counter_inc(&nfsdstats.rcnocache); > goto out; > } > > @@ -429,12 +446,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) > goto found_entry; > } > > - nfsdstats.rcmisses++; > + percpu_counter_inc(&nfsdstats.rcmisses); > rqstp->rq_cacherep = rp; > rp->c_state = RC_INPROG; > > atomic_inc(&nn->num_drc_entries); > - nn->drc_mem_usage += sizeof(*rp); > + percpu_counter_add(&nn->drc_mem_usage, sizeof(*rp)); > > /* go ahead and prune the cache */ > prune_bucket(b, nn); > @@ -446,7 +463,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) > > found_entry: > /* We found a matching entry which is either in progress or done. */ > - nfsdstats.rchits++; > + percpu_counter_inc(&nfsdstats.rchits); > rtn = RC_DROPIT; > > /* Request being processed */ > @@ -548,7 +565,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) > return; > } > spin_lock(&b->cache_lock); > - nn->drc_mem_usage += bufsize; > + percpu_counter_add(&nn->drc_mem_usage, bufsize); > lru_put_end(b, rp); > rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); > rp->c_type = cachetype; > @@ -588,13 +605,18 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) > > seq_printf(m, "max entries: %u\n", nn->max_drc_entries); > seq_printf(m, "num entries: %u\n", > - atomic_read(&nn->num_drc_entries)); > + atomic_read(&nn->num_drc_entries)); > seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); > - seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); > - seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); > - seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); > - seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); > - seq_printf(m, "payload misses: %u\n", nn->payload_misses); > + seq_printf(m, "mem usage: %lld\n", > + percpu_counter_sum_positive(&nn->drc_mem_usage)); > + seq_printf(m, "cache hits: %lld\n", > + percpu_counter_sum_positive(&nfsdstats.rchits)); > + seq_printf(m, "cache misses: %lld\n", > + percpu_counter_sum_positive(&nfsdstats.rcmisses)); > + seq_printf(m, "not cached: %lld\n", > + percpu_counter_sum_positive(&nfsdstats.rcnocache)); > + seq_printf(m, "payload misses: %lld\n", > + percpu_counter_sum_positive(&nn->payload_misses)); > seq_printf(m, "longest chain len: %u\n", nn->longest_chain); > seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); > return 0; > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index f6d5d783f4a4..258605ee49b8 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1534,7 +1534,9 @@ static int __init init_nfsd(void) > retval = nfsd4_init_pnfs(); > if (retval) > goto out_free_slabs; > - nfsd_stat_init(); /* Statistics */ > + retval = nfsd_stat_init(); /* Statistics */ > + if (retval) > + goto out_free_pnfs; > retval = nfsd_drc_slab_create(); > if (retval) > goto out_free_stat; > @@ -1554,6 +1556,7 @@ static int __init init_nfsd(void) > nfsd_drc_slab_free(); > out_free_stat: > nfsd_stat_shutdown(); > +out_free_pnfs: > nfsd4_exit_pnfs(); > out_free_slabs: > nfsd4_free_slabs(); > diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c > index c81dbbad8792..1879758bbaa5 100644 > --- a/fs/nfsd/nfsfh.c > +++ b/fs/nfsd/nfsfh.c > @@ -400,7 +400,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) > } > out: > if (error == nfserr_stale) > - nfsdstats.fh_stale++; > + percpu_counter_inc(&nfsdstats.fh_stale); > return error; > } > > diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c > index b1bc582b0493..7bef1e7139d7 100644 > --- a/fs/nfsd/stats.c > +++ b/fs/nfsd/stats.c > @@ -38,17 +38,17 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) > { > int i; > > - seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", > - nfsdstats.rchits, > - nfsdstats.rcmisses, > - nfsdstats.rcnocache, > - nfsdstats.fh_stale, > - nfsdstats.fh_lookup, > - nfsdstats.fh_anon, > - nfsdstats.fh_nocache_dir, > - nfsdstats.fh_nocache_nondir, > - nfsdstats.io_read, > - nfsdstats.io_write); > + seq_printf(seq, "rc %lld %lld %lld\nfh %lld %u %u %u %u\nio %lld %lld\n", > + percpu_counter_sum_positive(&nfsdstats.rchits), > + percpu_counter_sum_positive(&nfsdstats.rcmisses), > + percpu_counter_sum_positive(&nfsdstats.rcnocache), > + percpu_counter_sum_positive(&nfsdstats.fh_stale), > + nfsdstats.fh_lookup, > + nfsdstats.fh_anon, > + nfsdstats.fh_nocache_dir, > + nfsdstats.fh_nocache_nondir, > + percpu_counter_sum_positive(&nfsdstats.io_read), > + percpu_counter_sum_positive(&nfsdstats.io_write)); > /* thread usage: */ > seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); > for (i=0; i<10; i++) { > @@ -62,7 +62,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) > for (i=0; i<11; i++) > seq_printf(seq, " %u", nfsdstats.ra_depth[i]); > seq_putc(seq, '\n'); > - > + > /* show my rpc info */ > svc_seq_show(seq, &nfsd_svcstats); > > @@ -70,8 +70,10 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) > /* Show count for individual nfsv4 operations */ > /* Writing operation numbers 0 1 2 also for maintaining uniformity */ > seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); > - for (i = 0; i <= LAST_NFS4_OP; i++) > - seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); > + for (i = 0; i <= LAST_NFS4_OP; i++) { > + seq_printf(seq, " %lld", > + percpu_counter_sum_positive(&nfsdstats.nfs4_opcount[i])); > + } > > seq_putc(seq, '\n'); > #endif > @@ -91,14 +93,63 @@ static const struct proc_ops nfsd_proc_ops = { > .proc_release = single_release, > }; > > -void > -nfsd_stat_init(void) > +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) > { > + int i, err = 0; > + > + for (i = 0; !err && i < num; i++) > + err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); > + > + if (!err) > + return 0; > + > + for (; i > 0; i--) > + percpu_counter_destroy(&counters[i-1]); > + > + return err; > +} > + > +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) > +{ > + int i; > + > + for (i = 0; i < num; i++) > + percpu_counter_set(&counters[i], 0); > +} > + > +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) > +{ > + int i; > + > + for (i = 0; i < num; i++) > + percpu_counter_destroy(&counters[i]); > +} > + > +static int nfsd_stat_counters_init(void) > +{ > + return nfsd_percpu_counters_init(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM); > +} > + > +static void nfsd_stat_counters_destroy(void) > +{ > + nfsd_percpu_counters_destroy(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM); > +} > + > +int nfsd_stat_init(void) > +{ > + int err; > + > + err = nfsd_stat_counters_init(); > + if (err) > + return err; > + > svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); > + > + return 0; > } > > -void > -nfsd_stat_shutdown(void) > +void nfsd_stat_shutdown(void) > { > + nfsd_stat_counters_destroy(); > svc_proc_unregister(&init_net, "nfsd"); > } > diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h > index b23fdac69820..ad52a916375e 100644 > --- a/fs/nfsd/stats.h > +++ b/fs/nfsd/stats.h > @@ -8,37 +8,53 @@ > #define _NFSD_STATS_H > > #include <uapi/linux/nfsd/stats.h> > +#include <linux/percpu_counter.h> > > > struct nfsd_stats { > - unsigned int rchits; /* repcache hits */ > - unsigned int rcmisses; /* repcache hits */ > - unsigned int rcnocache; /* uncached reqs */ > - unsigned int fh_stale; /* FH stale error */ > + /* Reference to below counters as array for init/destroy */ > + struct percpu_counter counters[0]; > + struct percpu_counter rchits; /* repcache hits */ > + struct percpu_counter rcmisses; /* repcache hits */ > + struct percpu_counter rcnocache; /* uncached reqs */ > + struct percpu_counter fh_stale; /* FH stale error */ > + struct percpu_counter io_read; /* bytes returned to read requests */ > + struct percpu_counter io_write; /* bytes passed in write requests */ > +#ifdef CONFIG_NFSD_V4 > + /* Counters of individual nfsv4 operations */ > + struct percpu_counter nfs4_opcount[LAST_NFS4_OP + 1]; > +#endif > + /* End of array of couters */ > + struct percpu_counter counters_end[0]; > +#define NFSD_STATS_COUNTERS_NUM \ > + ((offsetof(struct nfsd_stats, counters_end) - \ > + offsetof(struct nfsd_stats, counters)) / sizeof(struct percpu_counter)) > + > + /* Protected by nfsd_mutex */ > + unsigned int th_cnt; /* number of available threads */ > + > + /* Not updated at all?? */ > unsigned int fh_lookup; /* dentry cached */ > unsigned int fh_anon; /* anon file dentry returned */ > unsigned int fh_nocache_dir; /* filehandle not found in dcache */ > unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ > - unsigned int io_read; /* bytes returned to read requests */ > - unsigned int io_write; /* bytes passed in write requests */ > - unsigned int th_cnt; /* number of available threads */ > unsigned int th_usage[10]; /* number of ticks during which n perdeciles > * of available threads were in use */ > unsigned int th_fullcnt; /* number of times last free thread was used */ > unsigned int ra_size; /* size of ra cache */ > unsigned int ra_depth[11]; /* number of times ra entry was found that deep > * in the cache (10percentiles). [10] = not found */ > -#ifdef CONFIG_NFSD_V4 > - unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ > -#endif > - > }; > > > extern struct nfsd_stats nfsdstats; > + > extern struct svc_stat nfsd_svcstats; > > -void nfsd_stat_init(void); > -void nfsd_stat_shutdown(void); > +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); > +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num); > +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num); > +int nfsd_stat_init(void); > +void nfsd_stat_shutdown(void); > > #endif /* _NFSD_STATS_H */ > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 1ecaceebee13..6adb7aba2575 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -889,7 +889,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned long *count, u32 *eof, ssize_t host_err) > { > if (host_err >= 0) { > - nfsdstats.io_read += host_err; > + percpu_counter_add(&nfsdstats.io_read, host_err); > *eof = nfsd_eof_on_read(file, offset, host_err, *count); > *count = host_err; > fsnotify_access(file); > @@ -1031,7 +1031,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, > goto out_nfserr; > } > *cnt = host_err; > - nfsdstats.io_write += *cnt; > + percpu_counter_add(&nfsdstats.io_write, *cnt); > fsnotify_modify(file); > > if (stable && use_wgather) { > -- > 2.17.1