On Wed, Aug 02, 2023 at 05:34:42PM +1000, NeilBrown wrote: > Rather than searching a list of threads to find an idle one, having a > list of idle threads allows an idle thread to be found immediately. > > This adds some spin_lock calls which is not ideal, but as the hold-time > is tiny it is still faster than searching a list. Keep in mind that b1691bc03d4e ("sunrpc: convert to lockless lookup of queued server threads") did the opposite because that very spin_lock was highly contended. I am skeptical of the above claim without lock_stat data... but that's sort of moot as this is a temporary situation, as you point out next. > A future patch will > remove them using llist.h. This involves some subtlety and so is left > to a separate patch. Since I haven't seen that patch yet, I'm reserving judgement about whether and how these two changes might be merged. > This removes the need for the RQ_BUSY flag. The rqst is "busy" > precisely when it is not on the "idle" list. I've been having some trouble with this one. The server system deadlocks hard as soon as the NFS server starts. I tracked it down this morning: this patch never initialized the sp_idle_threads list_head. I will apply this patch (with one-line fix) and the patch that removes SP_CONGESTED once I hear from the client folks on the "integrate backchannel" patch. > Signed-off-by: NeilBrown <neilb@xxxxxxx> > --- > include/linux/sunrpc/svc.h | 25 ++++++++++++++++++++++++- > include/trace/events/sunrpc.h | 1 - > net/sunrpc/svc.c | 13 ++++++++----- > net/sunrpc/svc_xprt.c | 15 +++++++++++---- > 4 files changed, 43 insertions(+), 11 deletions(-) > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index 1ac6f74781aa..8b93af92dd53 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -37,6 +37,7 @@ struct svc_pool { > struct list_head sp_sockets; /* pending sockets */ > unsigned int sp_nrthreads; /* # of threads in pool */ > struct list_head sp_all_threads; /* all server threads */ > + struct list_head sp_idle_threads; /* idle server threads */ > > /* statistics on pool operation */ > struct percpu_counter sp_messages_arrived; > @@ -186,6 +187,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); > */ > struct svc_rqst { > struct list_head rq_all; /* all threads list */ > + struct list_head rq_idle; /* On the idle list */ > struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ > struct svc_xprt * rq_xprt; /* transport ptr */ > > @@ -262,10 +264,31 @@ enum { > RQ_SPLICE_OK, /* turned off in gss privacy to prevent > * encrypting page cache pages */ > RQ_VICTIM, /* Have agreed to shut down */ > - RQ_BUSY, /* request is busy */ > RQ_DATA, /* request has data */ > }; > > +/** > + * svc_thread_set_busy - mark a thread as busy > + * @rqstp: the thread which is now busy > + * > + * If rq_idle is "empty", the thread must be busy. > + */ > +static inline void svc_thread_set_busy(struct svc_rqst *rqstp) > +{ > + INIT_LIST_HEAD(&rqstp->rq_idle); > +} > + > +/** > + * svc_thread_busy - check if a thread as busy > + * @rqstp: the thread which might be busy > + * > + * If rq_idle is "empty", the thread must be busy. > + */ > +static inline bool svc_thread_busy(struct svc_rqst *rqstp) > +{ > + return list_empty(&rqstp->rq_idle); > +} > + > #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) > > /* > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h > index 6beb38c1dcb5..337c90787fb1 100644 > --- a/include/trace/events/sunrpc.h > +++ b/include/trace/events/sunrpc.h > @@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); > svc_rqst_flag(DROPME) \ > svc_rqst_flag(SPLICE_OK) \ > svc_rqst_flag(VICTIM) \ > - svc_rqst_flag(BUSY) \ > svc_rqst_flag_end(DATA) > > #undef svc_rqst_flag > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index 1233d72714b9..dce433dea1bd 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -641,7 +641,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) > > folio_batch_init(&rqstp->rq_fbatch); > > - __set_bit(RQ_BUSY, &rqstp->rq_flags); > + svc_thread_set_busy(rqstp); > rqstp->rq_server = serv; > rqstp->rq_pool = pool; > > @@ -702,10 +702,13 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) > struct svc_rqst *rqstp; > > rcu_read_lock(); > - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { > - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) > - continue; > - > + spin_lock_bh(&pool->sp_lock); > + rqstp = list_first_entry_or_null(&pool->sp_idle_threads, > + struct svc_rqst, rq_idle); > + if (rqstp) > + list_del_init(&rqstp->rq_idle); > + spin_unlock_bh(&pool->sp_lock); > + if (rqstp) { > WRITE_ONCE(rqstp->rq_qtime, ktime_get()); > wake_up_process(rqstp->rq_task); > rcu_read_unlock(); > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > index 0a300ae6a7ed..e44efcc21b63 100644 > --- a/net/sunrpc/svc_xprt.c > +++ b/net/sunrpc/svc_xprt.c > @@ -737,8 +737,9 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > set_current_state(TASK_IDLE); > smp_mb__before_atomic(); > clear_bit(SP_CONGESTED, &pool->sp_flags); > - clear_bit(RQ_BUSY, &rqstp->rq_flags); > - smp_mb__after_atomic(); > + spin_lock_bh(&pool->sp_lock); > + list_add(&rqstp->rq_idle, &pool->sp_idle_threads); > + spin_unlock_bh(&pool->sp_lock); > > /* Need to check should_sleep() again after > * setting task state in case a wakeup happened > @@ -751,8 +752,14 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > cond_resched(); > } > > - set_bit(RQ_BUSY, &rqstp->rq_flags); > - smp_mb__after_atomic(); > + /* We *must* be removed from the list before we can continue. > + * If we were woken, this is already done > + */ > + if (!svc_thread_busy(rqstp)) { > + spin_lock_bh(&pool->sp_lock); > + list_del_init(&rqstp->rq_idle); > + spin_unlock_bh(&pool->sp_lock); > + } > } else > cond_resched(); > try_to_freeze(); > -- > 2.40.1 > -- Chuck Lever