Re: [PATCH 05/14] sunrpc: change sp_nrthreads from atomic_t to unsigned int.

"NeilBrown" <neilb@xxxxxxx> · Tue, 16 Jul 2024 11:33:40 +1000

On Tue, 16 Jul 2024, Jeff Layton wrote:
> On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > sp_nrthreads is only ever accessed under the service mutex
> >   nlmsvc_mutex nfs_callback_mutex nfsd_mutex
> > so these is no need for it to be an atomic_t.
> > 
> > The fact that all code using it is single-threaded means that we can
> > simplify svc_pool_victim and remove the temporary elevation of
> > sp_nrthreads.
> > 
> > Signed-off-by: NeilBrown <neilb@xxxxxxx>
> > ---
> >  fs/nfsd/nfsctl.c           |  2 +-
> >  fs/nfsd/nfssvc.c           |  2 +-
> >  include/linux/sunrpc/svc.h |  4 ++--
> >  net/sunrpc/svc.c           | 31 +++++++++++--------------------
> >  4 files changed, 15 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 5b0f2e0d7ccf..d85b6d1fa31f 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
> >  			struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
> >  
> >  			err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
> > -					  atomic_read(&sp->sp_nrthreads));
> > +					  sp->sp_nrthreads);
> >  			if (err)
> >  				goto err_unlock;
> >  		}
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 4438cdcd4873..7377422a34df 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
> >  
> >  	if (serv)
> >  		for (i = 0; i < serv->sv_nrpools && i < n; i++)
> > -			nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
> > +			nthreads[i] = serv->sv_pools[i].sp_nrthreads;
> >  	return 0;
> >  }
> >  
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index e4fa25fafa97..99e9345d829e 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -33,9 +33,9 @@
> >   * node traffic on multi-node NUMA NFS servers.
> >   */
> >  struct svc_pool {
> > -	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
> > +	unsigned int		sp_id;		/* pool id; also node id on NUMA */
> >  	struct lwq		sp_xprts;	/* pending transports */
> > -	atomic_t		sp_nrthreads;	/* # of threads in pool */
> > +	unsigned int		sp_nrthreads;	/* # of threads in pool */
> >  	struct list_head	sp_all_threads;	/* all server threads */
> >  	struct llist_head	sp_idle_threads; /* idle server threads */
> >  
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 072ad115ae3d..0d8588bc693c 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> >  	serv->sv_nrthreads += 1;
> >  	spin_unlock_bh(&serv->sv_lock);
> >  
> > -	atomic_inc(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads += 1;
> >  
> >  	/* Protected by whatever lock the service uses when calling
> >  	 * svc_set_num_threads()
> > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
> >  	struct svc_pool *pool;
> >  	unsigned int i;
> >  
> > -retry:
> >  	pool = target_pool;
> >  
> > -	if (pool != NULL) {
> > -		if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > -			goto found_pool;
> > -		return NULL;
> > -	} else {
> > +	if (!pool) {
> >  		for (i = 0; i < serv->sv_nrpools; i++) {
> >  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> > -			if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > -				goto found_pool;
> > +			if (pool->sp_nrthreads)
> > +				break;
> >  		}
> > -		return NULL;
> >  	}
> >  
> > -found_pool:
> > -	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	if (!atomic_dec_and_test(&pool->sp_nrthreads))
> > +	if (pool && pool->sp_nrthreads) {
> > +		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > +		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> >  		return pool;
> > -	/* Nothing left in this pool any more */
> > -	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	goto retry;
> > +	}
> > +	return NULL;
> >  }
> >  
> >  static int
> > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  	if (!pool)
> >  		nrservs -= serv->sv_nrthreads;
> >  	else
> > -		nrservs -= atomic_read(&pool->sp_nrthreads);
> > +		nrservs -= pool->sp_nrthreads;
> >  
> >  	if (nrservs > 0)
> >  		return svc_start_kthreads(serv, pool, nrservs);
> > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  
> >  	list_del_rcu(&rqstp->rq_all);
> >  
> > -	atomic_dec(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads -= 1;
> >  
> >  	spin_lock_bh(&serv->sv_lock);
> >  	serv->sv_nrthreads -= 1;
> 
> I don't think svc_exit_thread is called with the nfsd_mutex held, so if
> several threads were exiting at the same time, they could race here.

This is subtle and deserves explanation in the commit.
svc_exit_thread() is called in a thread *after* svc_thread_should_stop()
has returned true.  That means RQ_VICTIM is set and most likely
SP_NEED_VICTIM was set

SP_NEED_VICTIM is set in svc_pool_victim() which is called from
svc_stop_kthreads() which requires that the mutex is held.
svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is
the last thing that svc_exit_thread() does.
So when svc_exit_thread() is called, the mutex is held by some other
thread that is calling svc_set_num_threads().

This is also why the list_del_rcu() in svc_exit_thread() is safe.

The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set
(only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case
nfsd() ensures that the mutex is held.

This was why
 [PATCH 07/14] Change unshare_fs_struct() to never fail.
was needed.  If that fails in the current code, svc_exit_thread() can be
called without the mutex - which is already a theoretical problem for
the list_del_rcu().

Thanks,
NeilBrown