On Tue, Jul 16, 2024 at 11:33:40AM +1000, NeilBrown wrote: > On Tue, 16 Jul 2024, Jeff Layton wrote: > > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > > > sp_nrthreads is only ever accessed under the service mutex > > > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > > > so these is no need for it to be an atomic_t. > > > > > > The fact that all code using it is single-threaded means that we can > > > simplify svc_pool_victim and remove the temporary elevation of > > > sp_nrthreads. > > > > > > Signed-off-by: NeilBrown <neilb@xxxxxxx> > > > --- > > > fs/nfsd/nfsctl.c | 2 +- > > > fs/nfsd/nfssvc.c | 2 +- > > > include/linux/sunrpc/svc.h | 4 ++-- > > > net/sunrpc/svc.c | 31 +++++++++++-------------------- > > > 4 files changed, 15 insertions(+), 24 deletions(-) > > > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > > > --- a/fs/nfsd/nfsctl.c > > > +++ b/fs/nfsd/nfsctl.c > > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) > > > struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; > > > > > > err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, > > > - atomic_read(&sp->sp_nrthreads)); > > > + sp->sp_nrthreads); > > > if (err) > > > goto err_unlock; > > > } > > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > > index 4438cdcd4873..7377422a34df 100644 > > > --- a/fs/nfsd/nfssvc.c > > > +++ b/fs/nfsd/nfssvc.c > > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) > > > > > > if (serv) > > > for (i = 0; i < serv->sv_nrpools && i < n; i++) > > > - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); > > > + nthreads[i] = serv->sv_pools[i].sp_nrthreads; > > > return 0; > > > } > > > > > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > > > index e4fa25fafa97..99e9345d829e 100644 > > > --- a/include/linux/sunrpc/svc.h > > > +++ b/include/linux/sunrpc/svc.h > > > @@ -33,9 +33,9 @@ > > > * node traffic on multi-node NUMA NFS servers. > > > */ > > > struct svc_pool { > > > - unsigned int sp_id; /* pool id; also node id on NUMA */ > > > + unsigned int sp_id; /* pool id; also node id on NUMA */ > > > struct lwq sp_xprts; /* pending transports */ > > > - atomic_t sp_nrthreads; /* # of threads in pool */ > > > + unsigned int sp_nrthreads; /* # of threads in pool */ > > > struct list_head sp_all_threads; /* all server threads */ > > > struct llist_head sp_idle_threads; /* idle server threads */ > > > > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > > index 072ad115ae3d..0d8588bc693c 100644 > > > --- a/net/sunrpc/svc.c > > > +++ b/net/sunrpc/svc.c > > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > > > serv->sv_nrthreads += 1; > > > spin_unlock_bh(&serv->sv_lock); > > > > > > - atomic_inc(&pool->sp_nrthreads); > > > + pool->sp_nrthreads += 1; > > > > > > /* Protected by whatever lock the service uses when calling > > > * svc_set_num_threads() > > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, > > > struct svc_pool *pool; > > > unsigned int i; > > > > > > -retry: > > > pool = target_pool; > > > > > > - if (pool != NULL) { > > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > > - goto found_pool; > > > - return NULL; > > > - } else { > > > + if (!pool) { > > > for (i = 0; i < serv->sv_nrpools; i++) { > > > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; > > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > > - goto found_pool; > > > + if (pool->sp_nrthreads) > > > + break; > > > } > > > - return NULL; > > > } > > > > > > -found_pool: > > > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > > > + if (pool && pool->sp_nrthreads) { > > > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > return pool; > > > - /* Nothing left in this pool any more */ > > > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > - goto retry; > > > + } > > > + return NULL; > > > } > > > > > > static int > > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) > > > if (!pool) > > > nrservs -= serv->sv_nrthreads; > > > else > > > - nrservs -= atomic_read(&pool->sp_nrthreads); > > > + nrservs -= pool->sp_nrthreads; > > > > > > if (nrservs > 0) > > > return svc_start_kthreads(serv, pool, nrservs); > > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > > > > > list_del_rcu(&rqstp->rq_all); > > > > > > - atomic_dec(&pool->sp_nrthreads); > > > + pool->sp_nrthreads -= 1; > > > > > > spin_lock_bh(&serv->sv_lock); > > > serv->sv_nrthreads -= 1; > > > > I don't think svc_exit_thread is called with the nfsd_mutex held, so if > > several threads were exiting at the same time, they could race here. > > This is subtle and deserves explanation in the commit. Hi Neil, assuming you mean "commit message" here, are you planning to resend 5/14 with this update? > svc_exit_thread() is called in a thread *after* svc_thread_should_stop() > has returned true. That means RQ_VICTIM is set and most likely > SP_NEED_VICTIM was set > > SP_NEED_VICTIM is set in svc_pool_victim() which is called from > svc_stop_kthreads() which requires that the mutex is held. > svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is > the last thing that svc_exit_thread() does. > So when svc_exit_thread() is called, the mutex is held by some other > thread that is calling svc_set_num_threads(). > > This is also why the list_del_rcu() in svc_exit_thread() is safe. > > The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set > (only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case > nfsd() ensures that the mutex is held. > > This was why > [PATCH 07/14] Change unshare_fs_struct() to never fail. > was needed. If that fails in the current code, svc_exit_thread() can be > called without the mutex - which is already a theoretical problem for > the list_del_rcu(). > > Thanks, > NeilBrown -- Chuck Lever