Re: [PATCH 04/10] SUNRPC: change service idle list to be an llist

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Aug 16, 2023 at 08:44:39AM +1000, NeilBrown wrote:
> On Wed, 16 Aug 2023, Chuck Lever wrote:
> > On Tue, Aug 15, 2023 at 11:54:20AM +1000, NeilBrown wrote:
> > > With an llist we don't need to take a lock to add a thread to the list,
> > > though we still need a lock to remove it.  That will go in the next
> > > patch.
> > > 
> > > Unlike double-linked lists, a thread cannot reliably remove itself from
> > > the list.  Only the first thread can be removed, and that can change
> > > asynchronously.  So some care is needed.
> > > 
> > > We already check if there is pending work to do, so we are unlikely to
> > > add ourselves to the idle list and then want to remove ourselves again.
> > > 
> > > If we DO find something needs to be done after adding ourselves to the
> > > list, we simply wake up the first thread on the list.  If that was us,
> > > we successfully removed ourselves and can continue.  If it was some
> > > other thread, they will do the work that needs to be done.  We can
> > > safely sleep until woken.
> > > 
> > > We also remove the test on freezing() from rqst_should_sleep().  Instead
> > > we always try_to_freeze() before scheduling, which is needed as we now
> > > schedule() in a loop waiting to be removed from the idle queue.
> > > 
> > > Signed-off-by: NeilBrown <neilb@xxxxxxx>
> > > ---
> > >  include/linux/sunrpc/svc.h | 14 ++++++-----
> > >  net/sunrpc/svc.c           | 13 +++++-----
> > >  net/sunrpc/svc_xprt.c      | 50 +++++++++++++++++++-------------------
> > >  3 files changed, 40 insertions(+), 37 deletions(-)
> > > 
> > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > > index 22b3018ebf62..5216f95411e3 100644
> > > --- a/include/linux/sunrpc/svc.h
> > > +++ b/include/linux/sunrpc/svc.h
> > > @@ -37,7 +37,7 @@ struct svc_pool {
> > >  	struct list_head	sp_sockets;	/* pending sockets */
> > >  	unsigned int		sp_nrthreads;	/* # of threads in pool */
> > >  	struct list_head	sp_all_threads;	/* all server threads */
> > > -	struct list_head	sp_idle_threads; /* idle server threads */
> > > +	struct llist_head	sp_idle_threads; /* idle server threads */
> > >  
> > >  	/* statistics on pool operation */
> > >  	struct percpu_counter	sp_messages_arrived;
> > > @@ -186,7 +186,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
> > >   */
> > >  struct svc_rqst {
> > >  	struct list_head	rq_all;		/* all threads list */
> > > -	struct list_head	rq_idle;	/* On the idle list */
> > > +	struct llist_node	rq_idle;	/* On the idle list */
> > >  	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
> > >  	struct svc_xprt *	rq_xprt;	/* transport ptr */
> > >  
> > > @@ -270,22 +270,24 @@ enum {
> > >   * svc_thread_set_busy - mark a thread as busy
> > >   * @rqstp: the thread which is now busy
> > >   *
> > > - * If rq_idle is "empty", the thread must be busy.
> > > + * By convention a thread is busy if rq_idle.next points to rq_idle.
> > > + * This ensures it is not on the idle list.
> > >   */
> > >  static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
> > >  {
> > > -	INIT_LIST_HEAD(&rqstp->rq_idle);
> > > +	rqstp->rq_idle.next = &rqstp->rq_idle;
> > >  }
> > >  
> > >  /**
> > >   * svc_thread_busy - check if a thread as busy
> > >   * @rqstp: the thread which might be busy
> > >   *
> > > - * If rq_idle is "empty", the thread must be busy.
> > > + * By convention a thread is busy if rq_idle.next points to rq_idle.
> > > + * This ensures it is not on the idle list.
> > >   */
> > >  static inline bool svc_thread_busy(struct svc_rqst *rqstp)
> > >  {
> > > -	return list_empty(&rqstp->rq_idle);
> > > +	return rqstp->rq_idle.next == &rqstp->rq_idle;
> > >  }
> > >  
> > >  #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
> > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > > index 051f08c48418..addbf28ea50a 100644
> > > --- a/net/sunrpc/svc.c
> > > +++ b/net/sunrpc/svc.c
> > > @@ -510,7 +510,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
> > >  		pool->sp_id = i;
> > >  		INIT_LIST_HEAD(&pool->sp_sockets);
> > >  		INIT_LIST_HEAD(&pool->sp_all_threads);
> > > -		INIT_LIST_HEAD(&pool->sp_idle_threads);
> > > +		init_llist_head(&pool->sp_idle_threads);
> > >  		spin_lock_init(&pool->sp_lock);
> > >  
> > >  		percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
> > > @@ -701,15 +701,16 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> > >  void svc_pool_wake_idle_thread(struct svc_pool *pool)
> > >  {
> > >  	struct svc_rqst	*rqstp;
> > > +	struct llist_node *ln;
> > >  
> > >  	rcu_read_lock();
> > >  	spin_lock_bh(&pool->sp_lock);
> > > -	rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
> > > -					 struct svc_rqst, rq_idle);
> > > -	if (rqstp)
> > > -		list_del_init(&rqstp->rq_idle);
> > > +	ln = llist_del_first(&pool->sp_idle_threads);
> > >  	spin_unlock_bh(&pool->sp_lock);
> > > -	if (rqstp) {
> > > +	if (ln) {
> > > +		rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
> > > +		svc_thread_set_busy(rqstp);
> > > +
> > >  		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
> > >  		wake_up_process(rqstp->rq_task);
> > >  		rcu_read_unlock();
> > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > > index fa0d854a5596..7cb71effda0b 100644
> > > --- a/net/sunrpc/svc_xprt.c
> > > +++ b/net/sunrpc/svc_xprt.c
> > > @@ -715,10 +715,6 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> > >  	if (svc_thread_should_stop(rqstp))
> > >  		return false;
> > >  
> > > -	/* are we freezing? */
> > > -	if (freezing(current))
> > > -		return false;
> > > -
> > >  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> > >  	if (svc_is_backchannel(rqstp)) {
> > >  		if (!list_empty(&rqstp->rq_server->sv_cb_list))
> > > @@ -735,29 +731,32 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
> > >  
> > >  	if (rqst_should_sleep(rqstp)) {
> > >  		set_current_state(TASK_IDLE);
> > > -		spin_lock_bh(&pool->sp_lock);
> > > -		list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
> > > -		spin_unlock_bh(&pool->sp_lock);
> > > +		llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
> > > +
> > > +		if (unlikely(!rqst_should_sleep(rqstp)))
> > > +			/* maybe there were no idle threads when some work
> > > +			 * became ready and so nothing was woken.  We've just
> > > +			 * become idle so someone can to the work - maybe us.
> > > +			 * But we cannot reliably remove ourselves from the
> > > +			 * idle list - we can only remove the first task which
> > > +			 * might be us, and might not.
> > > +			 * So remove and wake it, then schedule().  If it was
> > > +			 * us, we won't sleep.  If it is some other thread, they
> > > +			 * will do the work.
> > > +			 */
> > > +			svc_pool_wake_idle_thread(pool);
> > >  
> > > -		/* Need to check should_sleep() again after
> > > -		 * setting task state in case a wakeup happened
> > > -		 * between testing and setting.
> > > +		/* We mustn't continue while on the idle list, and we
> > > +		 * cannot remove outselves reliably.  The only "work"
> > > +		 * we can do while on the idle list is to freeze.
> > > +		 * So loop until someone removes us
> > >  		 */
> > > -		if (rqst_should_sleep(rqstp)) {
> > > +		while (!svc_thread_busy(rqstp)) {
> > > +			try_to_freeze();
> > 
> > For testing, I've applied up to this patch. When nfsd is started
> > I now get this splat:
> > 
> > do not call blocking ops when !TASK_RUNNING; state=402 set at [<000000001e3d6995>] svc_recv+0x40/0x252 [sunrpc]
> 
> Thanks.  I didn't have the right CONFIG options to trigger that warning.
> I do now.
> I'm not surprised I got something wrong with freezing.  I did some
> research and now I see the part of freezing that I was missing.
> This incremental patch
> 
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 7cb71effda0b..81327001e074 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -730,7 +730,7 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
>  	struct svc_pool *pool = rqstp->rq_pool;
>  
>  	if (rqst_should_sleep(rqstp)) {
> -		set_current_state(TASK_IDLE);
> +		set_current_state(TASK_IDLE | TASK_FREEZABLE);
>  		llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
>  
>  		if (unlikely(!rqst_should_sleep(rqstp)))
> @@ -752,9 +752,8 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
>  		 * So loop until someone removes us
>  		 */
>  		while (!svc_thread_busy(rqstp)) {
> -			try_to_freeze();
>  			schedule();
> -			set_current_state(TASK_IDLE);
> +			set_current_state(TASK_IDLE | TASK_FREEZABLE);
>  		}
>  		__set_current_state(TASK_RUNNING);
>  	} else {
> 
> 
> fixes that issue.

Confirmed.


> "TASK_FREEZABLE" was the missing bit.  I'll need to
> look back at the other patches and probably introduce this earlier.

Also, it makes subsequent patches fail to apply. Can you send an
updated series?


> > WARNING: CPU: 3 PID: 1228 at kernel/sched/core.c:10112 __might_sleep+0x52/0x6a
> > Modules linked in: 8021q garp stp mrp llc rfkill rpcrdma rdma_ucm ib_srpt ib_isert iscsi_target_mod snd_hda_codec_realtek target_core_mod intel>
> > CPU: 3 PID: 1228 Comm: lockd Not tainted 6.5.0-rc6-00060-gd10a6b1ad2a1 #1
> > Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017
> > RIP: 0010:__might_sleep+0x52/0x6a
> > Code: 00 00 74 28 80 3d 45 40 d5 01 00 75 1f 48 8b 90 f0 1a 00 00 48 c7 c7 b3 d6 49 9b c6 05 2e 40 d5 01 01 48 89 d1 e8 e6 a2 fc ff <0f> 0b 44 >
> > RSP: 0018:ffffb3e3836abe68 EFLAGS: 00010286
> > RAX: 000000000000006f RBX: ffffffffc0c20599 RCX: 0000000000000027
> > RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: 0000000000000001
> > RBP: ffffb3e3836abe78 R08: 0000000000000000 R09: ffffb3e380e70020
> > R10: 0000000000000000 R11: ffffa0ae94aa4c00 R12: 0000000000000035
> > R13: ffffa0ae94bfa040 R14: ffffa0ae9e85c010 R15: ffffa0ae94bfa040
> > FS:  0000000000000000(0000) GS:ffffa0bdbfd80000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 00007f59c2611760 CR3: 000000069ec34006 CR4: 00000000003706e0
> > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > Call Trace:
> >  <TASK>
> >  ? show_regs+0x5d/0x64
> >  ? __might_sleep+0x52/0x6a
> >  ? __warn+0xab/0x132
> >  ? report_bug+0xd0/0x144
> >  ? __might_sleep+0x52/0x6a
> >  ? handle_bug+0x45/0x74
> >  ? exc_invalid_op+0x18/0x68
> >  ? asm_exc_invalid_op+0x1b/0x20
> >  ? __might_sleep+0x52/0x6a
> >  ? __might_sleep+0x52/0x6a
> >  try_to_freeze.isra.0+0x15/0x3d [sunrpc]
> >  svc_recv+0x97/0x252 [sunrpc]
> >  ? __pfx_lockd+0x10/0x10 [lockd]
> >  lockd+0x6b/0xda [lockd]
> >  kthread+0x10d/0x115
> >  ? __pfx_kthread+0x10/0x10
> >  ret_from_fork+0x2a/0x43
> >  ? __pfx_kthread+0x10/0x10
> >  ret_from_fork_asm+0x1b/0x30
> >  </TASK>
> > 
> > 
> > >  			schedule();
> > > -		} else {
> > > -			__set_current_state(TASK_RUNNING);
> > > -			cond_resched();
> > > -		}
> > > -
> > > -		/* We *must* be removed from the list before we can continue.
> > > -		 * If we were woken, this is already done
> > > -		 */
> > > -		if (!svc_thread_busy(rqstp)) {
> > > -			spin_lock_bh(&pool->sp_lock);
> > > -			list_del_init(&rqstp->rq_idle);
> > > -			spin_unlock_bh(&pool->sp_lock);
> > > +			set_current_state(TASK_IDLE);
> > >  		}
> > > +		__set_current_state(TASK_RUNNING);
> > >  	} else {
> > >  		cond_resched();
> > >  	}
> > > @@ -870,9 +869,10 @@ void svc_recv(struct svc_rqst *rqstp)
> > >  		struct svc_xprt *xprt = rqstp->rq_xprt;
> > >  
> > >  		/* Normally we will wait up to 5 seconds for any required
> > > -		 * cache information to be provided.
> > > +		 * cache information to be provided.  When there are no
> > > +		 * idle threads, we reduce the wait time.
> > >  		 */
> > > -		if (!list_empty(&pool->sp_idle_threads))
> > > +		if (pool->sp_idle_threads.first)
> > >  			rqstp->rq_chandle.thread_wait = 5 * HZ;
> > >  		else
> > >  			rqstp->rq_chandle.thread_wait = 1 * HZ;
> > > -- 
> > > 2.40.1
> > > 
> > 
> > -- 
> > Chuck Lever
> > 
> 

-- 
Chuck Lever



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux