From: Chuck Lever <chuck.lever@xxxxxxxxxx> Count and record thread pool starvation. Administrators can take action by increasing thread count or decreasing workload. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- include/linux/sunrpc/svc.h | 5 +++- include/trace/events/sunrpc.h | 49 ++++++++++++++++++++++++++++++++++------- net/sunrpc/svc.c | 9 +++++++- net/sunrpc/svc_xprt.c | 22 ++++++++++-------- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc2d90a655e2..fbfe6ea737c8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -22,7 +22,6 @@ #include <linux/pagevec.h> /* - * * RPC service thread pool. * * Pool of threads and temporary sockets. Generally there is only @@ -42,6 +41,7 @@ struct svc_pool { struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; + struct percpu_counter sp_threads_starved; #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ @@ -427,7 +427,8 @@ int svc_register(const struct svc_serv *, struct net *, const int, void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool); +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 43711753616a..9b70fc1c698a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1994,21 +1994,21 @@ TRACE_EVENT(svc_xprt_create_err, TRACE_EVENT(svc_xprt_enqueue, TP_PROTO( const struct svc_xprt *xprt, - const struct svc_rqst *rqst + const struct svc_rqst *wakee ), - TP_ARGS(xprt, rqst), + TP_ARGS(xprt, wakee), TP_STRUCT__entry( SVC_XPRT_ENDPOINT_FIELDS(xprt) - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); - __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->pid = wakee->rq_task->pid; ), TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d", @@ -2039,6 +2039,39 @@ TRACE_EVENT(svc_xprt_dequeue, SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) ); +#define show_svc_pool_flags(x) \ + __print_flags(x, "|", \ + { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ + { BIT(SP_CONGESTED), "CONGESTED" }) + +TRACE_EVENT(svc_pool_starved, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool + ), + + TP_ARGS(serv, pool), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->flags = pool->sp_flags; + ), + + TP_printk("service=%s pool=%d flags=%s nrthreads=%u", + __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->flags), __entry->nrthreads + ) +); + DECLARE_EVENT_CLASS(svc_xprt_event, TP_PROTO( const struct svc_xprt *xprt @@ -2109,16 +2142,16 @@ TRACE_EVENT(svc_xprt_accept, ); TRACE_EVENT(svc_wake_up, - TP_PROTO(int pid), + TP_PROTO(const struct svc_rqst *wakee), - TP_ARGS(pid), + TP_ARGS(wakee), TP_STRUCT__entry( - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = wakee->rq_task->pid; ), TP_printk("pid=%d", __entry->pid) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 05ee92b5fa1e..b79b8b41905d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -516,6 +516,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); } return serv; @@ -591,6 +592,7 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); + percpu_counter_destroy(&pool->sp_threads_starved); } kfree(serv->sv_pools); kfree(serv); @@ -691,6 +693,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) /** * svc_pool_wake_idle_thread - Awaken an idle thread in @pool + * @serv: RPC service * @pool: service thread pool * * Returns an idle service thread (now marked BUSY), or NULL @@ -698,7 +701,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) * thread and marking it BUSY is atomic with respect to other * calls to svc_pool_wake_idle_thread(). */ -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool) { struct svc_rqst *rqstp; @@ -714,6 +718,9 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) return rqstp; } rcu_read_unlock(); + + trace_svc_pool_starved(serv, pool); + percpu_counter_inc(&pool->sp_threads_starved); return NULL; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 89302bf09b77..a1ed6fb69793 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -455,7 +455,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; struct svc_pool *pool; if (!svc_xprt_ready(xprt)) @@ -476,9 +476,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - rqstp = svc_pool_wake_idle_thread(pool); - if (!rqstp) + rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool); + if (!rqstp) { set_bit(SP_CONGESTED, &pool->sp_flags); + return; + } trace_svc_xprt_enqueue(xprt, rqstp); } @@ -584,17 +586,16 @@ static void svc_xprt_release(struct svc_rqst *rqstp) void svc_wake_up(struct svc_serv *serv) { struct svc_pool *pool = &serv->sv_pools[0]; - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; - rqstp = svc_pool_wake_idle_thread(pool); + rqstp = svc_pool_wake_idle_thread(serv, pool); if (!rqstp) { set_bit(SP_TASK_PENDING, &pool->sp_flags); smp_wmb(); - trace_svc_wake_up(0); return; } - trace_svc_wake_up(rqstp->rq_task->pid); + trace_svc_wake_up(rqstp); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -1436,16 +1437,17 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { - seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); + seq_puts(m, "# pool packets-arrived xprts-enqueued threads-woken threads-timedout starved\n"); return 0; } - seq_printf(m, "%u %llu %llu %llu %llu\n", + seq_printf(m, "%u %llu %llu %llu %llu %llu\n", pool->sp_id, percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), - percpu_counter_sum_positive(&pool->sp_threads_timedout)); + percpu_counter_sum_positive(&pool->sp_threads_timedout), + percpu_counter_sum_positive(&pool->sp_threads_starved)); return 0; }