On Wed, Aug 16, 2023 at 08:44:39AM +1000, NeilBrown wrote: > On Wed, 16 Aug 2023, Chuck Lever wrote: > > On Tue, Aug 15, 2023 at 11:54:20AM +1000, NeilBrown wrote: > > > With an llist we don't need to take a lock to add a thread to the list, > > > though we still need a lock to remove it. That will go in the next > > > patch. > > > > > > Unlike double-linked lists, a thread cannot reliably remove itself from > > > the list. Only the first thread can be removed, and that can change > > > asynchronously. So some care is needed. > > > > > > We already check if there is pending work to do, so we are unlikely to > > > add ourselves to the idle list and then want to remove ourselves again. > > > > > > If we DO find something needs to be done after adding ourselves to the > > > list, we simply wake up the first thread on the list. If that was us, > > > we successfully removed ourselves and can continue. If it was some > > > other thread, they will do the work that needs to be done. We can > > > safely sleep until woken. > > > > > > We also remove the test on freezing() from rqst_should_sleep(). Instead > > > we always try_to_freeze() before scheduling, which is needed as we now > > > schedule() in a loop waiting to be removed from the idle queue. > > > > > > Signed-off-by: NeilBrown <neilb@xxxxxxx> > > > --- > > > include/linux/sunrpc/svc.h | 14 ++++++----- > > > net/sunrpc/svc.c | 13 +++++----- > > > net/sunrpc/svc_xprt.c | 50 +++++++++++++++++++------------------- > > > 3 files changed, 40 insertions(+), 37 deletions(-) > > > > > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > > > index 22b3018ebf62..5216f95411e3 100644 > > > --- a/include/linux/sunrpc/svc.h > > > +++ b/include/linux/sunrpc/svc.h > > > @@ -37,7 +37,7 @@ struct svc_pool { > > > struct list_head sp_sockets; /* pending sockets */ > > > unsigned int sp_nrthreads; /* # of threads in pool */ > > > struct list_head sp_all_threads; /* all server threads */ > > > - struct list_head sp_idle_threads; /* idle server threads */ > > > + struct llist_head sp_idle_threads; /* idle server threads */ > > > > > > /* statistics on pool operation */ > > > struct percpu_counter sp_messages_arrived; > > > @@ -186,7 +186,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); > > > */ > > > struct svc_rqst { > > > struct list_head rq_all; /* all threads list */ > > > - struct list_head rq_idle; /* On the idle list */ > > > + struct llist_node rq_idle; /* On the idle list */ > > > struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ > > > struct svc_xprt * rq_xprt; /* transport ptr */ > > > > > > @@ -270,22 +270,24 @@ enum { > > > * svc_thread_set_busy - mark a thread as busy > > > * @rqstp: the thread which is now busy > > > * > > > - * If rq_idle is "empty", the thread must be busy. > > > + * By convention a thread is busy if rq_idle.next points to rq_idle. > > > + * This ensures it is not on the idle list. > > > */ > > > static inline void svc_thread_set_busy(struct svc_rqst *rqstp) > > > { > > > - INIT_LIST_HEAD(&rqstp->rq_idle); > > > + rqstp->rq_idle.next = &rqstp->rq_idle; > > > } > > > > > > /** > > > * svc_thread_busy - check if a thread as busy > > > * @rqstp: the thread which might be busy > > > * > > > - * If rq_idle is "empty", the thread must be busy. > > > + * By convention a thread is busy if rq_idle.next points to rq_idle. > > > + * This ensures it is not on the idle list. > > > */ > > > static inline bool svc_thread_busy(struct svc_rqst *rqstp) > > > { > > > - return list_empty(&rqstp->rq_idle); > > > + return rqstp->rq_idle.next == &rqstp->rq_idle; > > > } > > > > > > #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > > index 051f08c48418..addbf28ea50a 100644 > > > --- a/net/sunrpc/svc.c > > > +++ b/net/sunrpc/svc.c > > > @@ -510,7 +510,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, > > > pool->sp_id = i; > > > INIT_LIST_HEAD(&pool->sp_sockets); > > > INIT_LIST_HEAD(&pool->sp_all_threads); > > > - INIT_LIST_HEAD(&pool->sp_idle_threads); > > > + init_llist_head(&pool->sp_idle_threads); > > > spin_lock_init(&pool->sp_lock); > > > > > > percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); > > > @@ -701,15 +701,16 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > > > void svc_pool_wake_idle_thread(struct svc_pool *pool) > > > { > > > struct svc_rqst *rqstp; > > > + struct llist_node *ln; > > > > > > rcu_read_lock(); > > > spin_lock_bh(&pool->sp_lock); > > > - rqstp = list_first_entry_or_null(&pool->sp_idle_threads, > > > - struct svc_rqst, rq_idle); > > > - if (rqstp) > > > - list_del_init(&rqstp->rq_idle); > > > + ln = llist_del_first(&pool->sp_idle_threads); > > > spin_unlock_bh(&pool->sp_lock); > > > - if (rqstp) { > > > + if (ln) { > > > + rqstp = llist_entry(ln, struct svc_rqst, rq_idle); > > > + svc_thread_set_busy(rqstp); > > > + > > > WRITE_ONCE(rqstp->rq_qtime, ktime_get()); > > > wake_up_process(rqstp->rq_task); > > > rcu_read_unlock(); > > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > > > index fa0d854a5596..7cb71effda0b 100644 > > > --- a/net/sunrpc/svc_xprt.c > > > +++ b/net/sunrpc/svc_xprt.c > > > @@ -715,10 +715,6 @@ rqst_should_sleep(struct svc_rqst *rqstp) > > > if (svc_thread_should_stop(rqstp)) > > > return false; > > > > > > - /* are we freezing? */ > > > - if (freezing(current)) > > > - return false; > > > - > > > #if defined(CONFIG_SUNRPC_BACKCHANNEL) > > > if (svc_is_backchannel(rqstp)) { > > > if (!list_empty(&rqstp->rq_server->sv_cb_list)) > > > @@ -735,29 +731,32 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > > > > > > if (rqst_should_sleep(rqstp)) { > > > set_current_state(TASK_IDLE); > > > - spin_lock_bh(&pool->sp_lock); > > > - list_add(&rqstp->rq_idle, &pool->sp_idle_threads); > > > - spin_unlock_bh(&pool->sp_lock); > > > + llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); > > > + > > > + if (unlikely(!rqst_should_sleep(rqstp))) > > > + /* maybe there were no idle threads when some work > > > + * became ready and so nothing was woken. We've just > > > + * become idle so someone can to the work - maybe us. > > > + * But we cannot reliably remove ourselves from the > > > + * idle list - we can only remove the first task which > > > + * might be us, and might not. > > > + * So remove and wake it, then schedule(). If it was > > > + * us, we won't sleep. If it is some other thread, they > > > + * will do the work. > > > + */ > > > + svc_pool_wake_idle_thread(pool); > > > > > > - /* Need to check should_sleep() again after > > > - * setting task state in case a wakeup happened > > > - * between testing and setting. > > > + /* We mustn't continue while on the idle list, and we > > > + * cannot remove outselves reliably. The only "work" > > > + * we can do while on the idle list is to freeze. > > > + * So loop until someone removes us > > > */ > > > - if (rqst_should_sleep(rqstp)) { > > > + while (!svc_thread_busy(rqstp)) { > > > + try_to_freeze(); > > > > For testing, I've applied up to this patch. When nfsd is started > > I now get this splat: > > > > do not call blocking ops when !TASK_RUNNING; state=402 set at [<000000001e3d6995>] svc_recv+0x40/0x252 [sunrpc] > > Thanks. I didn't have the right CONFIG options to trigger that warning. > I do now. > I'm not surprised I got something wrong with freezing. I did some > research and now I see the part of freezing that I was missing. > This incremental patch > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > index 7cb71effda0b..81327001e074 100644 > --- a/net/sunrpc/svc_xprt.c > +++ b/net/sunrpc/svc_xprt.c > @@ -730,7 +730,7 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > struct svc_pool *pool = rqstp->rq_pool; > > if (rqst_should_sleep(rqstp)) { > - set_current_state(TASK_IDLE); > + set_current_state(TASK_IDLE | TASK_FREEZABLE); > llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); > > if (unlikely(!rqst_should_sleep(rqstp))) > @@ -752,9 +752,8 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > * So loop until someone removes us > */ > while (!svc_thread_busy(rqstp)) { > - try_to_freeze(); > schedule(); > - set_current_state(TASK_IDLE); > + set_current_state(TASK_IDLE | TASK_FREEZABLE); > } > __set_current_state(TASK_RUNNING); > } else { > > > fixes that issue. Confirmed. > "TASK_FREEZABLE" was the missing bit. I'll need to > look back at the other patches and probably introduce this earlier. Also, it makes subsequent patches fail to apply. Can you send an updated series? > > WARNING: CPU: 3 PID: 1228 at kernel/sched/core.c:10112 __might_sleep+0x52/0x6a > > Modules linked in: 8021q garp stp mrp llc rfkill rpcrdma rdma_ucm ib_srpt ib_isert iscsi_target_mod snd_hda_codec_realtek target_core_mod intel> > > CPU: 3 PID: 1228 Comm: lockd Not tainted 6.5.0-rc6-00060-gd10a6b1ad2a1 #1 > > Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017 > > RIP: 0010:__might_sleep+0x52/0x6a > > Code: 00 00 74 28 80 3d 45 40 d5 01 00 75 1f 48 8b 90 f0 1a 00 00 48 c7 c7 b3 d6 49 9b c6 05 2e 40 d5 01 01 48 89 d1 e8 e6 a2 fc ff <0f> 0b 44 > > > RSP: 0018:ffffb3e3836abe68 EFLAGS: 00010286 > > RAX: 000000000000006f RBX: ffffffffc0c20599 RCX: 0000000000000027 > > RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: 0000000000000001 > > RBP: ffffb3e3836abe78 R08: 0000000000000000 R09: ffffb3e380e70020 > > R10: 0000000000000000 R11: ffffa0ae94aa4c00 R12: 0000000000000035 > > R13: ffffa0ae94bfa040 R14: ffffa0ae9e85c010 R15: ffffa0ae94bfa040 > > FS: 0000000000000000(0000) GS:ffffa0bdbfd80000(0000) knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > CR2: 00007f59c2611760 CR3: 000000069ec34006 CR4: 00000000003706e0 > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > > Call Trace: > > <TASK> > > ? show_regs+0x5d/0x64 > > ? __might_sleep+0x52/0x6a > > ? __warn+0xab/0x132 > > ? report_bug+0xd0/0x144 > > ? __might_sleep+0x52/0x6a > > ? handle_bug+0x45/0x74 > > ? exc_invalid_op+0x18/0x68 > > ? asm_exc_invalid_op+0x1b/0x20 > > ? __might_sleep+0x52/0x6a > > ? __might_sleep+0x52/0x6a > > try_to_freeze.isra.0+0x15/0x3d [sunrpc] > > svc_recv+0x97/0x252 [sunrpc] > > ? __pfx_lockd+0x10/0x10 [lockd] > > lockd+0x6b/0xda [lockd] > > kthread+0x10d/0x115 > > ? __pfx_kthread+0x10/0x10 > > ret_from_fork+0x2a/0x43 > > ? __pfx_kthread+0x10/0x10 > > ret_from_fork_asm+0x1b/0x30 > > </TASK> > > > > > > > schedule(); > > > - } else { > > > - __set_current_state(TASK_RUNNING); > > > - cond_resched(); > > > - } > > > - > > > - /* We *must* be removed from the list before we can continue. > > > - * If we were woken, this is already done > > > - */ > > > - if (!svc_thread_busy(rqstp)) { > > > - spin_lock_bh(&pool->sp_lock); > > > - list_del_init(&rqstp->rq_idle); > > > - spin_unlock_bh(&pool->sp_lock); > > > + set_current_state(TASK_IDLE); > > > } > > > + __set_current_state(TASK_RUNNING); > > > } else { > > > cond_resched(); > > > } > > > @@ -870,9 +869,10 @@ void svc_recv(struct svc_rqst *rqstp) > > > struct svc_xprt *xprt = rqstp->rq_xprt; > > > > > > /* Normally we will wait up to 5 seconds for any required > > > - * cache information to be provided. > > > + * cache information to be provided. When there are no > > > + * idle threads, we reduce the wait time. > > > */ > > > - if (!list_empty(&pool->sp_idle_threads)) > > > + if (pool->sp_idle_threads.first) > > > rqstp->rq_chandle.thread_wait = 5 * HZ; > > > else > > > rqstp->rq_chandle.thread_wait = 1 * HZ; > > > -- > > > 2.40.1 > > > > > > > -- > > Chuck Lever > > > -- Chuck Lever