Re: [RFC] better visibility into kworkers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 16, 2018 at 10:35 AM Linus Torvalds <
torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:

> Anyway, my gut feel is that we should try making the /proc/<pid>/status
> field bigger (not necessarily TASK_COMM_LEN - just add a similar

>        if (tsk->flags & PF_WQ_WORKER) {

> to the task_name() code in fs/proc/array.c), and see if that works better.

Ok, so 'systemd' uses /proc/<pid>/comm, but procps-ng only accesses stat,
status,
and cmdline for each process.

Some testing shows that 'ps' on my system gets the name from
/proc/<pid>/status, but annoyingly truncates it to 16 bytes (TASK_COMM_LEN)
even though the name line can obviously be much longer.

So with the attached patch I have:

     [torvalds@i7 linux]$ cat /proc/55/status | grep Name
     Name: kworker/7:0H kblockd
     [torvalds@i7 linux]$ cat /proc/55/comm
     kworker/7:0H kblockd

but "ps a" annoyingly just shows

     [torvalds@i7 procps]$ ps ax | grep -w 55
        55 ?        I<     0:00 [kworker/7:0H kb]

because it "knows" that the comm field can be only 16 bytes.

So we can add the extended kworker information to the /proc files, but "ps"
won't show them very well.

Adding Craig Small to the cc. A trivial one-liner to procps-ng fixes
things: changing the size of "cmd[]" in struct proc_t in proc/readproc.h
from 16 to 32 makes it just work:

     [torvalds@i7 procps]$ ./ps/pscommand ax | grep -w 55
        55 ?        I<     0:00 [kworker/7:0H kblockd]

Hmm.

Tejun, this patch is obviously just your original patch, modified to use a
seqfile (which really simplifies things anyway), and then changed to use
/proc/comm and /proc/status instead of /proc/cmdline.

Comments? We could do the kernel change with the hope that some day procps
is ok with bigger names, even if it doesn't show them today.

Craig? Is there some "proper channel" I should go through to ask for a
bigger possible command line?

                         Linus
 fs/proc/array.c             |   2 +
 fs/proc/base.c              |  11 +++--
 include/linux/workqueue.h   |   1 +
 kernel/workqueue.c          | 109 +++++++++++++++++++++++++++-----------------
 kernel/workqueue_internal.h |   1 -
 5 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..14839e02ef49 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -109,6 +109,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
 	size = seq_get_buf(m, &buf);
 	ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
 	seq_commit(m, ret < size ? ret : -1);
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_cmdline(m, p);
 
 	seq_putc(m, '\n');
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b2ede6abcdf..5845a6a5267b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1560,17 +1560,20 @@ static int comm_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = m->private;
 	struct task_struct *p;
+	char comm[TASK_COMM_LEN];
 
 	p = get_proc_task(inode);
 	if (!p)
 		return -ESRCH;
 
-	task_lock(p);
-	seq_printf(m, "%s\n", p->comm);
-	task_unlock(p);
+	get_task_comm(comm, p);
 
-	put_task_struct(p);
+	seq_puts(m, comm);
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_cmdline(m, p);
 
+	seq_putc(m, '\n');
+	put_task_struct(p);
 	return 0;
 }
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 39a0e215022a..39a10a753876 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -494,6 +494,7 @@ extern unsigned int work_busy(struct work_struct *work);
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
 extern void show_workqueue_state(void);
+extern void wq_worker_cmdline(struct seq_file *m, struct task_struct *task);
 
 /**
  * queue_work - queue work on a workqueue
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca7959be8aaa..b75921d54f32 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,7 +66,7 @@ enum {
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
 	 * Note that DISASSOCIATED should be flipped only while holding
-	 * attach_mutex to avoid changing binding state while
+	 * wq_pool_attach_mutex to avoid changing binding state while
 	 * worker_attach_to_pool() is in progress.
 	 */
 	POOL_MANAGER_ACTIVE	= 1 << 0,	/* being managed */
@@ -123,7 +123,7 @@ enum {
  *    cpu or grabbing pool->lock is enough for read access.  If
  *    POOL_DISASSOCIATED is set, it's identical to L.
  *
- * A: pool->attach_mutex protected.
+ * A: wq_pool_attach_mutex protected.
  *
  * PL: wq_pool_mutex protected.
  *
@@ -166,7 +166,6 @@ struct worker_pool {
 						/* L: hash of busy workers */
 
 	struct worker		*manager;	/* L: purely informational */
-	struct mutex		attach_mutex;	/* attach/detach exclusion */
 	struct list_head	workers;	/* A: attached workers */
 	struct completion	*detach_completion; /* all workers detached */
 
@@ -297,6 +296,7 @@ static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
+static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
 
@@ -399,14 +399,14 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  * @worker: iteration cursor
  * @pool: worker_pool to iterate workers of
  *
- * This must be called with @pool->attach_mutex.
+ * This must be called with wq_pool_attach_mutex.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
 #define for_each_pool_worker(worker, pool)				\
 	list_for_each_entry((worker), &(pool)->workers, node)		\
-		if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
+		if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
 		else
 
 /**
@@ -1724,7 +1724,7 @@ static struct worker *alloc_worker(int node)
 static void worker_attach_to_pool(struct worker *worker,
 				   struct worker_pool *pool)
 {
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
 
 	/*
 	 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
@@ -1733,37 +1733,40 @@ static void worker_attach_to_pool(struct worker *worker,
 	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
 	/*
-	 * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
-	 * stable across this function.  See the comments above the
-	 * flag definition for details.
+	 * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
+	 * stable across this function.  See the comments above the flag
+	 * definition for details.
 	 */
 	if (pool->flags & POOL_DISASSOCIATED)
 		worker->flags |= WORKER_UNBOUND;
 
 	list_add_tail(&worker->node, &pool->workers);
+	worker->pool = pool;
 
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 }
 
 /**
  * worker_detach_from_pool() - detach a worker from its pool
  * @worker: worker which is attached to its pool
- * @pool: the pool @worker is attached to
  *
  * Undo the attaching which had been done in worker_attach_to_pool().  The
  * caller worker shouldn't access to the pool after detached except it has
  * other reference to the pool.
  */
-static void worker_detach_from_pool(struct worker *worker,
-				    struct worker_pool *pool)
+static void worker_detach_from_pool(struct worker *worker)
 {
+	struct worker_pool *pool = worker->pool;
 	struct completion *detach_completion = NULL;
 
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
+
 	list_del(&worker->node);
+	worker->pool = NULL;
+
 	if (list_empty(&pool->workers))
 		detach_completion = pool->detach_completion;
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 
 	/* clear leftover flags without pool->lock after it is detached */
 	worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
@@ -1799,7 +1802,6 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (!worker)
 		goto fail;
 
-	worker->pool = pool;
 	worker->id = id;
 
 	if (pool->cpu >= 0)
@@ -2086,6 +2088,12 @@ __acquires(&pool->lock)
 	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
+	/*
+	 * Record wq name for cmdline and debug reporting, may get
+	 * overridden through set_worker_desc().
+	 */
+	strlcpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
+
 	list_del_init(&work->entry);
 
 	/*
@@ -2181,7 +2189,6 @@ __acquires(&pool->lock)
 	worker->current_work = NULL;
 	worker->current_func = NULL;
 	worker->current_pwq = NULL;
-	worker->desc_valid = false;
 	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
@@ -2236,7 +2243,7 @@ static int worker_thread(void *__worker)
 
 		set_task_comm(worker->task, "kworker/dying");
 		ida_simple_remove(&pool->worker_ida, worker->id);
-		worker_detach_from_pool(worker, pool);
+		worker_detach_from_pool(worker);
 		kfree(worker);
 		return 0;
 	}
@@ -2367,7 +2374,6 @@ static int rescuer_thread(void *__rescuer)
 		worker_attach_to_pool(rescuer, pool);
 
 		spin_lock_irq(&pool->lock);
-		rescuer->pool = pool;
 
 		/*
 		 * Slurp in all works issued via this workqueue and
@@ -2417,10 +2423,9 @@ static int rescuer_thread(void *__rescuer)
 		if (need_more_worker(pool))
 			wake_up_worker(pool);
 
-		rescuer->pool = NULL;
 		spin_unlock_irq(&pool->lock);
 
-		worker_detach_from_pool(rescuer, pool);
+		worker_detach_from_pool(rescuer);
 
 		spin_lock_irq(&wq_mayday_lock);
 	}
@@ -3271,7 +3276,6 @@ static int init_worker_pool(struct worker_pool *pool)
 
 	timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
 
-	mutex_init(&pool->attach_mutex);
 	INIT_LIST_HEAD(&pool->workers);
 
 	ida_init(&pool->worker_ida);
@@ -3354,10 +3358,10 @@ static void put_unbound_pool(struct worker_pool *pool)
 	WARN_ON(pool->nr_workers || pool->nr_idle);
 	spin_unlock_irq(&pool->lock);
 
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
 	if (!list_empty(&pool->workers))
 		pool->detach_completion = &detach_completion;
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 
 	if (pool->detach_completion)
 		wait_for_completion(pool->detach_completion);
@@ -4347,7 +4351,6 @@ void set_worker_desc(const char *fmt, ...)
 		va_start(args, fmt);
 		vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
 		va_end(args);
-		worker->desc_valid = true;
 	}
 }
 
@@ -4369,9 +4372,9 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
 	work_func_t *fn = NULL;
 	char name[WQ_NAME_LEN] = { };
 	char desc[WORKER_DESC_LEN] = { };
+	struct work_struct *work = NULL;
 	struct pool_workqueue *pwq = NULL;
 	struct workqueue_struct *wq = NULL;
-	bool desc_valid = false;
 	struct worker *worker;
 
 	if (!(task->flags & PF_WQ_WORKER))
@@ -4383,6 +4386,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
 	 */
 	worker = kthread_probe_data(task);
 
+	/* skip idle workers */
+	probe_kernel_read(&work, &worker->current_work, sizeof(work));
+	if (!work)
+		return;
+
 	/*
 	 * Carefully copy the associated workqueue's workfn and name.  Keep
 	 * the original last '\0' in case the original contains garbage.
@@ -4393,16 +4401,9 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
 	probe_kernel_read(name, wq->name, sizeof(name) - 1);
 
 	/* copy worker description */
-	probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
-	if (desc_valid)
-		probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
-
-	if (fn || name[0] || desc[0]) {
-		printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
-		if (desc[0])
-			pr_cont(" (%s)", desc);
-		pr_cont("\n");
-	}
+	probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+
+	printk("%sWorkqueue: %s %pf (%s)\n", log_lvl, name, fn, desc);
 }
 
 static void pr_cont_pool_info(struct worker_pool *pool)
@@ -4579,6 +4580,30 @@ void show_workqueue_state(void)
 	rcu_read_unlock_sched();
 }
 
+void wq_worker_cmdline(struct seq_file *m, struct task_struct *task)
+{
+	struct worker *worker;
+	struct worker_pool *pool;
+
+	mutex_lock(&wq_pool_attach_mutex);
+
+	worker = kthread_data(task);
+	pool = worker->pool;
+
+	if (pool) {
+		spin_lock_irq(&pool->lock);
+		if (worker->desc[0] != '\0') {
+			const char *fmt = " %s";
+			if (worker->current_work)
+				fmt = " <%s>";
+			seq_printf(m, fmt, worker->desc);
+		}
+		spin_unlock_irq(&pool->lock);
+	}
+
+	mutex_unlock(&wq_pool_attach_mutex);
+}
+
 /*
  * CPU hotplug.
  *
@@ -4600,7 +4625,7 @@ static void unbind_workers(int cpu)
 	struct worker *worker;
 
 	for_each_cpu_worker_pool(pool, cpu) {
-		mutex_lock(&pool->attach_mutex);
+		mutex_lock(&wq_pool_attach_mutex);
 		spin_lock_irq(&pool->lock);
 
 		/*
@@ -4616,7 +4641,7 @@ static void unbind_workers(int cpu)
 		pool->flags |= POOL_DISASSOCIATED;
 
 		spin_unlock_irq(&pool->lock);
-		mutex_unlock(&pool->attach_mutex);
+		mutex_unlock(&wq_pool_attach_mutex);
 
 		/*
 		 * Call schedule() so that we cross rq->lock and thus can
@@ -4657,7 +4682,7 @@ static void rebind_workers(struct worker_pool *pool)
 {
 	struct worker *worker;
 
-	lockdep_assert_held(&pool->attach_mutex);
+	lockdep_assert_held(&wq_pool_attach_mutex);
 
 	/*
 	 * Restore CPU affinity of all workers.  As all idle workers should
@@ -4727,7 +4752,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
 	static cpumask_t cpumask;
 	struct worker *worker;
 
-	lockdep_assert_held(&pool->attach_mutex);
+	lockdep_assert_held(&wq_pool_attach_mutex);
 
 	/* is @cpu allowed for @pool? */
 	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4762,14 +4787,14 @@ int workqueue_online_cpu(unsigned int cpu)
 	mutex_lock(&wq_pool_mutex);
 
 	for_each_pool(pool, pi) {
-		mutex_lock(&pool->attach_mutex);
+		mutex_lock(&wq_pool_attach_mutex);
 
 		if (pool->cpu == cpu)
 			rebind_workers(pool);
 		else if (pool->cpu < 0)
 			restore_unbound_workers_cpumask(pool, cpu);
 
-		mutex_unlock(&pool->attach_mutex);
+		mutex_unlock(&wq_pool_attach_mutex);
 	}
 
 	/* update NUMA affinity of unbound workqueues */
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index d390d1be3748..feffcc2aa9f7 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -31,7 +31,6 @@ struct worker {
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
 	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
-	bool			desc_valid;	/* ->desc is valid */
 	struct list_head	scheduled;	/* L: scheduled works */
 
 	/* 64 bytes boundary on 64bit, 32 on 32bit */

[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux