On Wed, May 16, 2018 at 10:35 AM Linus Torvalds < torvalds@xxxxxxxxxxxxxxxxxxxx> wrote: > Anyway, my gut feel is that we should try making the /proc/<pid>/status > field bigger (not necessarily TASK_COMM_LEN - just add a similar > if (tsk->flags & PF_WQ_WORKER) { > to the task_name() code in fs/proc/array.c), and see if that works better. Ok, so 'systemd' uses /proc/<pid>/comm, but procps-ng only accesses stat, status, and cmdline for each process. Some testing shows that 'ps' on my system gets the name from /proc/<pid>/status, but annoyingly truncates it to 16 bytes (TASK_COMM_LEN) even though the name line can obviously be much longer. So with the attached patch I have: [torvalds@i7 linux]$ cat /proc/55/status | grep Name Name: kworker/7:0H kblockd [torvalds@i7 linux]$ cat /proc/55/comm kworker/7:0H kblockd but "ps a" annoyingly just shows [torvalds@i7 procps]$ ps ax | grep -w 55 55 ? I< 0:00 [kworker/7:0H kb] because it "knows" that the comm field can be only 16 bytes. So we can add the extended kworker information to the /proc files, but "ps" won't show them very well. Adding Craig Small to the cc. A trivial one-liner to procps-ng fixes things: changing the size of "cmd[]" in struct proc_t in proc/readproc.h from 16 to 32 makes it just work: [torvalds@i7 procps]$ ./ps/pscommand ax | grep -w 55 55 ? I< 0:00 [kworker/7:0H kblockd] Hmm. Tejun, this patch is obviously just your original patch, modified to use a seqfile (which really simplifies things anyway), and then changed to use /proc/comm and /proc/status instead of /proc/cmdline. Comments? We could do the kernel change with the hope that some day procps is ok with bigger names, even if it doesn't show them today. Craig? Is there some "proper channel" I should go through to ask for a bigger possible command line? Linus
fs/proc/array.c | 2 + fs/proc/base.c | 11 +++-- include/linux/workqueue.h | 1 + kernel/workqueue.c | 109 +++++++++++++++++++++++++++----------------- kernel/workqueue_internal.h | 1 - 5 files changed, 77 insertions(+), 47 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index ae2c807fd719..14839e02ef49 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -109,6 +109,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) size = seq_get_buf(m, &buf); ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); seq_commit(m, ret < size ? ret : -1); + if (p->flags & PF_WQ_WORKER) + wq_worker_cmdline(m, p); seq_putc(m, '\n'); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b2ede6abcdf..5845a6a5267b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1560,17 +1560,20 @@ static int comm_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; + char comm[TASK_COMM_LEN]; p = get_proc_task(inode); if (!p) return -ESRCH; - task_lock(p); - seq_printf(m, "%s\n", p->comm); - task_unlock(p); + get_task_comm(comm, p); - put_task_struct(p); + seq_puts(m, comm); + if (p->flags & PF_WQ_WORKER) + wq_worker_cmdline(m, p); + seq_putc(m, '\n'); + put_task_struct(p); return 0; } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 39a0e215022a..39a10a753876 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -494,6 +494,7 @@ extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void show_workqueue_state(void); +extern void wq_worker_cmdline(struct seq_file *m, struct task_struct *task); /** * queue_work - queue work on a workqueue diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca7959be8aaa..b75921d54f32 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -66,7 +66,7 @@ enum { * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED should be flipped only while holding - * attach_mutex to avoid changing binding state while + * wq_pool_attach_mutex to avoid changing binding state while * worker_attach_to_pool() is in progress. */ POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */ @@ -123,7 +123,7 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * - * A: pool->attach_mutex protected. + * A: wq_pool_attach_mutex protected. * * PL: wq_pool_mutex protected. * @@ -166,7 +166,6 @@ struct worker_pool { /* L: hash of busy workers */ struct worker *manager; /* L: purely informational */ - struct mutex attach_mutex; /* attach/detach exclusion */ struct list_head workers; /* A: attached workers */ struct completion *detach_completion; /* all workers detached */ @@ -297,6 +296,7 @@ static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ +static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ @@ -399,14 +399,14 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @worker: iteration cursor * @pool: worker_pool to iterate workers of * - * This must be called with @pool->attach_mutex. + * This must be called with wq_pool_attach_mutex. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool_worker(worker, pool) \ list_for_each_entry((worker), &(pool)->workers, node) \ - if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \ + if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \ else /** @@ -1724,7 +1724,7 @@ static struct worker *alloc_worker(int node) static void worker_attach_to_pool(struct worker *worker, struct worker_pool *pool) { - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); /* * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any @@ -1733,37 +1733,40 @@ static void worker_attach_to_pool(struct worker *worker, set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); /* - * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains - * stable across this function. See the comments above the - * flag definition for details. + * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains + * stable across this function. See the comments above the flag + * definition for details. */ if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; list_add_tail(&worker->node, &pool->workers); + worker->pool = pool; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); } /** * worker_detach_from_pool() - detach a worker from its pool * @worker: worker which is attached to its pool - * @pool: the pool @worker is attached to * * Undo the attaching which had been done in worker_attach_to_pool(). The * caller worker shouldn't access to the pool after detached except it has * other reference to the pool. */ -static void worker_detach_from_pool(struct worker *worker, - struct worker_pool *pool) +static void worker_detach_from_pool(struct worker *worker) { + struct worker_pool *pool = worker->pool; struct completion *detach_completion = NULL; - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); + list_del(&worker->node); + worker->pool = NULL; + if (list_empty(&pool->workers)) detach_completion = pool->detach_completion; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); /* clear leftover flags without pool->lock after it is detached */ worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); @@ -1799,7 +1802,6 @@ static struct worker *create_worker(struct worker_pool *pool) if (!worker) goto fail; - worker->pool = pool; worker->id = id; if (pool->cpu >= 0) @@ -2086,6 +2088,12 @@ __acquires(&pool->lock) worker->current_pwq = pwq; work_color = get_work_color(work); + /* + * Record wq name for cmdline and debug reporting, may get + * overridden through set_worker_desc(). + */ + strlcpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); + list_del_init(&work->entry); /* @@ -2181,7 +2189,6 @@ __acquires(&pool->lock) worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; - worker->desc_valid = false; pwq_dec_nr_in_flight(pwq, work_color); } @@ -2236,7 +2243,7 @@ static int worker_thread(void *__worker) set_task_comm(worker->task, "kworker/dying"); ida_simple_remove(&pool->worker_ida, worker->id); - worker_detach_from_pool(worker, pool); + worker_detach_from_pool(worker); kfree(worker); return 0; } @@ -2367,7 +2374,6 @@ static int rescuer_thread(void *__rescuer) worker_attach_to_pool(rescuer, pool); spin_lock_irq(&pool->lock); - rescuer->pool = pool; /* * Slurp in all works issued via this workqueue and @@ -2417,10 +2423,9 @@ static int rescuer_thread(void *__rescuer) if (need_more_worker(pool)) wake_up_worker(pool); - rescuer->pool = NULL; spin_unlock_irq(&pool->lock); - worker_detach_from_pool(rescuer, pool); + worker_detach_from_pool(rescuer); spin_lock_irq(&wq_mayday_lock); } @@ -3271,7 +3276,6 @@ static int init_worker_pool(struct worker_pool *pool) timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); - mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); ida_init(&pool->worker_ida); @@ -3354,10 +3358,10 @@ static void put_unbound_pool(struct worker_pool *pool) WARN_ON(pool->nr_workers || pool->nr_idle); spin_unlock_irq(&pool->lock); - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); if (!list_empty(&pool->workers)) pool->detach_completion = &detach_completion; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); if (pool->detach_completion) wait_for_completion(pool->detach_completion); @@ -4347,7 +4351,6 @@ void set_worker_desc(const char *fmt, ...) va_start(args, fmt); vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); va_end(args); - worker->desc_valid = true; } } @@ -4369,9 +4372,9 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) work_func_t *fn = NULL; char name[WQ_NAME_LEN] = { }; char desc[WORKER_DESC_LEN] = { }; + struct work_struct *work = NULL; struct pool_workqueue *pwq = NULL; struct workqueue_struct *wq = NULL; - bool desc_valid = false; struct worker *worker; if (!(task->flags & PF_WQ_WORKER)) @@ -4383,6 +4386,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) */ worker = kthread_probe_data(task); + /* skip idle workers */ + probe_kernel_read(&work, &worker->current_work, sizeof(work)); + if (!work) + return; + /* * Carefully copy the associated workqueue's workfn and name. Keep * the original last '\0' in case the original contains garbage. @@ -4393,16 +4401,9 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) probe_kernel_read(name, wq->name, sizeof(name) - 1); /* copy worker description */ - probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid)); - if (desc_valid) - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); - - if (fn || name[0] || desc[0]) { - printk("%sWorkqueue: %s %pf", log_lvl, name, fn); - if (desc[0]) - pr_cont(" (%s)", desc); - pr_cont("\n"); - } + probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + + printk("%sWorkqueue: %s %pf (%s)\n", log_lvl, name, fn, desc); } static void pr_cont_pool_info(struct worker_pool *pool) @@ -4579,6 +4580,30 @@ void show_workqueue_state(void) rcu_read_unlock_sched(); } +void wq_worker_cmdline(struct seq_file *m, struct task_struct *task) +{ + struct worker *worker; + struct worker_pool *pool; + + mutex_lock(&wq_pool_attach_mutex); + + worker = kthread_data(task); + pool = worker->pool; + + if (pool) { + spin_lock_irq(&pool->lock); + if (worker->desc[0] != '\0') { + const char *fmt = " %s"; + if (worker->current_work) + fmt = " <%s>"; + seq_printf(m, fmt, worker->desc); + } + spin_unlock_irq(&pool->lock); + } + + mutex_unlock(&wq_pool_attach_mutex); +} + /* * CPU hotplug. * @@ -4600,7 +4625,7 @@ static void unbind_workers(int cpu) struct worker *worker; for_each_cpu_worker_pool(pool, cpu) { - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); spin_lock_irq(&pool->lock); /* @@ -4616,7 +4641,7 @@ static void unbind_workers(int cpu) pool->flags |= POOL_DISASSOCIATED; spin_unlock_irq(&pool->lock); - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); /* * Call schedule() so that we cross rq->lock and thus can @@ -4657,7 +4682,7 @@ static void rebind_workers(struct worker_pool *pool) { struct worker *worker; - lockdep_assert_held(&pool->attach_mutex); + lockdep_assert_held(&wq_pool_attach_mutex); /* * Restore CPU affinity of all workers. As all idle workers should @@ -4727,7 +4752,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) static cpumask_t cpumask; struct worker *worker; - lockdep_assert_held(&pool->attach_mutex); + lockdep_assert_held(&wq_pool_attach_mutex); /* is @cpu allowed for @pool? */ if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) @@ -4762,14 +4787,14 @@ int workqueue_online_cpu(unsigned int cpu) mutex_lock(&wq_pool_mutex); for_each_pool(pool, pi) { - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); if (pool->cpu == cpu) rebind_workers(pool); else if (pool->cpu < 0) restore_unbound_workers_cpumask(pool, cpu); - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); } /* update NUMA affinity of unbound workqueues */ diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index d390d1be3748..feffcc2aa9f7 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -31,7 +31,6 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ struct pool_workqueue *current_pwq; /* L: current_work's pwq */ - bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* L: scheduled works */ /* 64 bytes boundary on 64bit, 32 on 32bit */