On Tue, 16 Jul 2024, Jeff Layton wrote: > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > > sp_nrthreads is only ever accessed under the service mutex > > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > > so these is no need for it to be an atomic_t. > > > > The fact that all code using it is single-threaded means that we can > > simplify svc_pool_victim and remove the temporary elevation of > > sp_nrthreads. > > > > Signed-off-by: NeilBrown <neilb@xxxxxxx> > > --- > > fs/nfsd/nfsctl.c | 2 +- > > fs/nfsd/nfssvc.c | 2 +- > > include/linux/sunrpc/svc.h | 4 ++-- > > net/sunrpc/svc.c | 31 +++++++++++-------------------- > > 4 files changed, 15 insertions(+), 24 deletions(-) > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > > --- a/fs/nfsd/nfsctl.c > > +++ b/fs/nfsd/nfsctl.c > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) > > struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; > > > > err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, > > - atomic_read(&sp->sp_nrthreads)); > > + sp->sp_nrthreads); > > if (err) > > goto err_unlock; > > } > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > index 4438cdcd4873..7377422a34df 100644 > > --- a/fs/nfsd/nfssvc.c > > +++ b/fs/nfsd/nfssvc.c > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) > > > > if (serv) > > for (i = 0; i < serv->sv_nrpools && i < n; i++) > > - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); > > + nthreads[i] = serv->sv_pools[i].sp_nrthreads; > > return 0; > > } > > > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > > index e4fa25fafa97..99e9345d829e 100644 > > --- a/include/linux/sunrpc/svc.h > > +++ b/include/linux/sunrpc/svc.h > > @@ -33,9 +33,9 @@ > > * node traffic on multi-node NUMA NFS servers. > > */ > > struct svc_pool { > > - unsigned int sp_id; /* pool id; also node id on NUMA */ > > + unsigned int sp_id; /* pool id; also node id on NUMA */ > > struct lwq sp_xprts; /* pending transports */ > > - atomic_t sp_nrthreads; /* # of threads in pool */ > > + unsigned int sp_nrthreads; /* # of threads in pool */ > > struct list_head sp_all_threads; /* all server threads */ > > struct llist_head sp_idle_threads; /* idle server threads */ > > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > index 072ad115ae3d..0d8588bc693c 100644 > > --- a/net/sunrpc/svc.c > > +++ b/net/sunrpc/svc.c > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > > serv->sv_nrthreads += 1; > > spin_unlock_bh(&serv->sv_lock); > > > > - atomic_inc(&pool->sp_nrthreads); > > + pool->sp_nrthreads += 1; > > > > /* Protected by whatever lock the service uses when calling > > * svc_set_num_threads() > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, > > struct svc_pool *pool; > > unsigned int i; > > > > -retry: > > pool = target_pool; > > > > - if (pool != NULL) { > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > - goto found_pool; > > - return NULL; > > - } else { > > + if (!pool) { > > for (i = 0; i < serv->sv_nrpools; i++) { > > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > - goto found_pool; > > + if (pool->sp_nrthreads) > > + break; > > } > > - return NULL; > > } > > > > -found_pool: > > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > > + if (pool && pool->sp_nrthreads) { > > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > return pool; > > - /* Nothing left in this pool any more */ > > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - goto retry; > > + } > > + return NULL; > > } > > > > static int > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) > > if (!pool) > > nrservs -= serv->sv_nrthreads; > > else > > - nrservs -= atomic_read(&pool->sp_nrthreads); > > + nrservs -= pool->sp_nrthreads; > > > > if (nrservs > 0) > > return svc_start_kthreads(serv, pool, nrservs); > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > > > list_del_rcu(&rqstp->rq_all); > > > > - atomic_dec(&pool->sp_nrthreads); > > + pool->sp_nrthreads -= 1; > > > > spin_lock_bh(&serv->sv_lock); > > serv->sv_nrthreads -= 1; > > I don't think svc_exit_thread is called with the nfsd_mutex held, so if > several threads were exiting at the same time, they could race here. This is subtle and deserves explanation in the commit. svc_exit_thread() is called in a thread *after* svc_thread_should_stop() has returned true. That means RQ_VICTIM is set and most likely SP_NEED_VICTIM was set SP_NEED_VICTIM is set in svc_pool_victim() which is called from svc_stop_kthreads() which requires that the mutex is held. svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is the last thing that svc_exit_thread() does. So when svc_exit_thread() is called, the mutex is held by some other thread that is calling svc_set_num_threads(). This is also why the list_del_rcu() in svc_exit_thread() is safe. The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set (only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case nfsd() ensures that the mutex is held. This was why [PATCH 07/14] Change unshare_fs_struct() to never fail. was needed. If that fails in the current code, svc_exit_thread() can be called without the mutex - which is already a theoretical problem for the list_del_rcu(). Thanks, NeilBrown