Re: [PATCH 09/14] SUNRPC: change how svc threads are asked to exit.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 21 Jul 2023, Jeff Layton wrote:
> On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> > svc threads are currently stopped using kthread_stop().  This requires
> > identifying a specific thread.  However we don't care which thread
> > stops, just as long as one does.
> > 
> > So instead, set a flag in the svc_pool to say that a thread needs to
> > die, and have each thread check this flag instead of calling
> > kthread_should_stop().  The first to find it clear the flag and moves
> > towards shutting down.
> > 
> > This removes an explicit dependency on sp_all_threads which will make a
> > future patch simpler.
> > 
> > Signed-off-by: NeilBrown <neilb@xxxxxxx>
> > ---
> >  fs/lockd/svc.c                |    4 ++--
> >  fs/lockd/svclock.c            |    4 ++--
> >  fs/nfs/callback.c             |    2 +-
> >  fs/nfsd/nfs4proc.c            |    8 +++++---
> >  fs/nfsd/nfssvc.c              |    2 +-
> >  include/linux/lockd/lockd.h   |    2 +-
> >  include/linux/sunrpc/svc.h    |   22 +++++++++++++++++++++-
> >  include/trace/events/sunrpc.h |    5 +++--
> >  net/sunrpc/svc.c              |   39 +++++++++++++++++----------------------
> >  net/sunrpc/svc_xprt.c         |    8 +++-----
> >  10 files changed, 56 insertions(+), 40 deletions(-)
> > 
> > diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> > index 4f55cd42c2e6..30543edd2309 100644
> > --- a/fs/lockd/svc.c
> > +++ b/fs/lockd/svc.c
> > @@ -134,12 +134,12 @@ lockd(void *vrqstp)
> >  	 * The main request loop. We don't terminate until the last
> >  	 * NFS mount or NFS daemon has gone away.
> >  	 */
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  
> >  		/* update sv_maxconn if it has changed */
> >  		rqstp->rq_server->sv_maxconn = nlm_max_connections;
> >  
> > -		nlmsvc_retry_blocked();
> > +		nlmsvc_retry_blocked(rqstp);
> >  
> >  		/*
> >  		 * Find any work to do, such as a socket with data available,
> > diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> > index 3d7bd5c04b36..fd399c9bea5c 100644
> > --- a/fs/lockd/svclock.c
> > +++ b/fs/lockd/svclock.c
> > @@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
> >   * be retransmitted.
> >   */
> >  void
> > -nlmsvc_retry_blocked(void)
> > +nlmsvc_retry_blocked(struct svc_rqst *rqstp)
> >  {
> >  	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
> >  	struct nlm_block *block;
> >  
> >  	spin_lock(&nlm_blocked_lock);
> > -	while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
> > +	while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
> >  		block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
> >  
> >  		if (block->b_when == NLM_NEVER)
> > diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> > index 660cec36c45c..c58ec2e66aaf 100644
> > --- a/fs/nfs/callback.c
> > +++ b/fs/nfs/callback.c
> > @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
> >  
> >  	set_freezable();
> >  
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  		/*
> >  		 * Listen for a request on the socket
> >  		 */
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 157488290676..66024ed06181 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
> >   * setup a work entry in the ssc delayed unmount list.
> >   */
> >  static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> > -				  struct nfsd4_ssc_umount_item **nsui)
> > +				  struct nfsd4_ssc_umount_item **nsui,
> > +				  struct svc_rqst *rqstp)
> >  {
> >  	struct nfsd4_ssc_umount_item *ni = NULL;
> >  	struct nfsd4_ssc_umount_item *work = NULL;
> > @@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> >  			spin_unlock(&nn->nfsd_ssc_lock);
> >  
> >  			/* allow 20secs for mount/unmount for now - revisit */
> > -			if (kthread_should_stop() ||
> > +			if (svc_thread_should_stop(rqstp) ||
> >  					(schedule_timeout(20*HZ) == 0)) {
> >  				finish_wait(&nn->nfsd_ssc_waitq, &wait);
> >  				kfree(work);
> > @@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> >  		goto out_free_rawdata;
> >  	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
> >  
> > -	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
> > +	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
> >  	if (status)
> >  		goto out_free_devname;
> >  	if ((*nsui)->nsui_vfsmount)
> > @@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
> >  	if (bytes_total == 0)
> >  		bytes_total = ULLONG_MAX;
> >  	do {
> > +		/* Only async copies can be stopped here */
> >  		if (kthread_should_stop())
> >  			break;
> >  		bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index b536b254c59e..9b282c89ea87 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -971,7 +971,7 @@ nfsd(void *vrqstp)
> >  	/*
> >  	 * The main request loop
> >  	 */
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  		/* Update sv_maxconn if it has changed */
> >  		rqstp->rq_server->sv_maxconn = nn->max_connections;
> >  
> > diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> > index 0f016d69c996..9f565416d186 100644
> > --- a/include/linux/lockd/lockd.h
> > +++ b/include/linux/lockd/lockd.h
> > @@ -282,7 +282,7 @@ __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
> >  			struct nlm_host *, struct nlm_lock *,
> >  			struct nlm_lock *, struct nlm_cookie *);
> >  __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> > -void		  nlmsvc_retry_blocked(void);
> > +void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
> >  void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
> >  					nlm_host_match_fn_t match);
> >  void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 15d468d852b5..ea3ce1315416 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -51,6 +51,8 @@ struct svc_pool {
> >  enum {
> >  	SP_TASK_PENDING,	/* still work to do even if no xprt is queued */
> >  	SP_CONGESTED,		/* all threads are busy, none idle */
> > +	SP_NEED_VICTIM,		/* One thread needs to agree to exit */
> > +	SP_VICTIM_REMAINS,	/* One thread needs to actually exit */
> >  };
> >  
> >  /*
> > @@ -275,7 +277,7 @@ enum {
> >  	RQ_DROPME,	/* drop current reply */
> >  	RQ_SPLICE_OK,	/* turned off in gss privacy to prevent encrypting page
> >  			 * cache pages */
> > -	RQ_VICTIM,	/* about to be shut down */
> > +	RQ_VICTIM,	/* Have agreed to shut down */
> >  	RQ_BUSY,	/* request is busy */
> >  	RQ_DATA,	/* request has data */
> >  };
> > @@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
> >  	return (struct sockaddr *) &rqst->rq_daddr;
> >  }
> >  
> > +/**
> > + * svc_thread_should_stop - check if this thread should stop
> > + * @rqstp: the thread that might need to stop
> > + *
> > + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
> > + * are set.  The firs thread which sees SP_NEED_VICTIM clear it becoming
> > + * the victim using this function.  It should then promptly call
> > + * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
> > + * so the task waiting for a thread to exit can wake and continue.
> > + */
> > +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
> > +{
> > +	if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
> > +		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +
> > +	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +}
> > +
> >  struct svc_deferred_req {
> >  	u32			prot;	/* protocol (UDP or TCP) */
> >  	struct svc_xprt		*xprt;
> > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > index 60c8e03268d4..c79375e37dc2 100644
> > --- a/include/trace/events/sunrpc.h
> > +++ b/include/trace/events/sunrpc.h
> > @@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
> >  #define show_svc_pool_flags(x)						\
> >  	__print_flags(x, "|",						\
> >  		{ BIT(SP_TASK_PENDING),		"TASK_PENDING" },	\
> > -		{ BIT(SP_CONGESTED),		"CONGESTED" })
> > -
> > +		{ BIT(SP_CONGESTED),		"CONGESTED" },		\
> > +		{ BIT(SP_NEED_VICTIM),		"NEED_VICTIM" },	\
> > +		{ BIT(SP_VICTIM_REMAINS),	"VICTIM_REMAINS" })
> >  DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
> >  	TP_PROTO(
> >  		const struct svc_rqst *rqstp
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 56b4a88776d5..b18175ef74ec 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> >  	return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
> >  }
> >  
> > -static struct task_struct *
> > +static struct svc_pool *
> >  svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> >  {
> >  	unsigned int i;
> > -	struct task_struct *task = NULL;
> >  
> >  	if (pool != NULL) {
> >  		spin_lock_bh(&pool->sp_lock);
> > +		if (pool->sp_nrthreads)
> > +			goto found_pool;
> > +		spin_unlock_bh(&pool->sp_lock);
> > +		return NULL;
> >  	} else {
> >  		for (i = 0; i < serv->sv_nrpools; i++) {
> >  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> >  			spin_lock_bh(&pool->sp_lock);
> > -			if (!list_empty(&pool->sp_all_threads))
> > +			if (pool->sp_nrthreads)
> >  				goto found_pool;
> >  			spin_unlock_bh(&pool->sp_lock);
> >  		}
> > @@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
> >  	}
> >  
> >  found_pool:
> > -	if (!list_empty(&pool->sp_all_threads)) {
> > -		struct svc_rqst *rqstp;
> > -
> > -		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
> > -		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > -		list_del_rcu(&rqstp->rq_all);
> > -		task = rqstp->rq_task;
> > -	}
> > +	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > +	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> >  	spin_unlock_bh(&pool->sp_lock);
> > -	return task;
> > +	return pool;
> >  }
> >  
> >  static int
> > @@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  static int
> >  svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  {
> > -	struct svc_rqst	*rqstp;
> > -	struct task_struct *task;
> >  	unsigned int state = serv->sv_nrthreads-1;
> > +	struct svc_pool *vpool;
> >  
> >  	do {
> > -		task = svc_pool_victim(serv, pool, &state);
> > -		if (task == NULL)
> > +		vpool = svc_pool_victim(serv, pool, &state);
> > +		if (!vpool)
> >  			break;
> > -		rqstp = kthread_data(task);
> > -		/* Did we lose a race to svo_function threadfn? */
> > -		if (kthread_stop(task) == -EINTR)
> > -			svc_exit_thread(rqstp);
> > +		svc_pool_wake_idle_thread(serv, vpool);
> > +		wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
> > +			    TASK_IDLE);
> 
> I'm not sure about this bit. Previously (AFAICT) we didn't shut down the
> threads serially. With this change, we will be. Granted we only have to
> wait until SP_VICTIM_REMAINS is clear. Does that happen early enough in
> the shutdown process that you aren't worried about this?

If by "previously" you mean "before v5.17-rc1~75^2~42", then you are
correct.
When we used send_sig to stop nfsd threads we sent the signals in a
tight loop and didn't wait.

Since then we use kthread_stop() which is synchronous.  It sets a flag,
wakes the threads, and waits on a completion.

With this patch the clear_and_wake_bit() happens very slightly
*before* the complete() which we currently wait for rather late in
exit() handling (complete_vfork_done() I think).

Thanks for the review - here and elsewhere.

NeilBrown


> 
> >  		nrservs++;
> >  	} while (nrservs < 0);
> >  	return 0;
> > @@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  
> >  	spin_lock_bh(&pool->sp_lock);
> >  	pool->sp_nrthreads--;
> > -	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
> > -		list_del_rcu(&rqstp->rq_all);
> >  	spin_unlock_bh(&pool->sp_lock);
> >  
> >  	spin_lock_bh(&serv->sv_lock);
> > @@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  	spin_unlock_bh(&serv->sv_lock);
> >  	svc_sock_update_bufs(serv);
> >  
> > +	if (svc_thread_should_stop(rqstp))
> > +		clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> >  	svc_rqst_free(rqstp);
> >  
> >  	svc_put(serv);
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index 4fdf1aaa514b..948605e7043b 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
> >  		smp_wmb();
> >  		return;
> >  	}
> > -
> > -	trace_svc_wake_up(rqstp);
> >  }
> >  EXPORT_SYMBOL_GPL(svc_wake_up);
> >  
> > @@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
> >  			continue;
> >  
> >  		set_current_state(TASK_IDLE);
> > -		if (kthread_should_stop()) {
> > +		if (svc_thread_should_stop(rqstp)) {
> >  			set_current_state(TASK_RUNNING);
> >  			return false;
> >  		}
> > @@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> >  		return false;
> >  
> >  	/* are we shutting down? */
> > -	if (kthread_should_stop())
> > +	if (svc_thread_should_stop(rqstp))
> >  		return false;
> >  
> >  	/* are we freezing? */
> > @@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
> >  	}
> >  #endif
> >  
> > -	if (!kthread_should_stop())
> > +	if (!svc_thread_should_stop(rqstp))
> >  		percpu_counter_inc(&pool->sp_threads_no_work);
> >  	return;
> >  out_found:
> > 
> > 
> 
> -- 
> Jeff Layton <jlayton@xxxxxxxxxx>
> 





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux