Re: [RFC] better visibility into kworkers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 17, 2018 at 5:18 AM, Tejun Heo <tj@xxxxxxxxxx> wrote:
> Hello, Linus.
>
> On Wed, May 16, 2018 at 01:13:03PM -0700, Linus Torvalds wrote:
>> Some testing shows that 'ps' on my system gets the name from
>> /proc/<pid>/status, but annoyingly truncates it to 16 bytes (TASK_COMM_LEN)
>> even though the name line can obviously be much longer.
>
> Yeah, seeing the same thing.
>
>> Tejun, this patch is obviously just your original patch, modified to use a
>> seqfile (which really simplifies things anyway), and then changed to use
>> /proc/comm and /proc/status instead of /proc/cmdline.
>>
>> Comments? We could do the kernel change with the hope that some day procps
>> is ok with bigger names, even if it doesn't show them today.
>
> I was doing the same thing.  The patch below makes all three - comm,
> stat and status show the same thing.
>
>   # cat /proc/1000/status | grep Name
>   Name:   kworker/14:1H-kblockd
>   # cat /proc/1000/stat
>   1000 (kworker/14:1H-kblockd) I 2 0 0 0 -1 69238880 0 0 0 0 0 0 0 0 0 -20 1 0 444 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 14 0 0 0 0 0 0 0 0 0 0 0 0 0
>   # cat /proc/1000/comm
>   kworker/14:1H-kblockd
>
> I switched from the space and <> to + for currently running and - for
> the last workqueue.  It's visually a bit busier but likely kinder to
> scrips which aren't expecting whitespaces in comm.
>
> Other than the ps truncation, it seems to work fine.  If this looks
> good, I'll prep the patches and route them through the wq tree.
>
> Thanks.
>
> Index: work/fs/proc/array.c
> ===================================================================
> --- work.orig/fs/proc/array.c
> +++ work/fs/proc/array.c
> @@ -95,22 +95,29 @@
>  #include <asm/processor.h>
>  #include "internal.h"
>
> -static inline void task_name(struct seq_file *m, struct task_struct *p)
> +void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
>  {
>         char *buf;
>         size_t size;
> -       char tcomm[sizeof(p->comm)];
> +       char tcomm[64];
>         int ret;
>
> -       get_task_comm(tcomm, p);
> -
> -       seq_puts(m, "Name:\t");
> +       if (p->flags & PF_WQ_WORKER)
> +               wq_worker_comm(tcomm, sizeof(tcomm), p);
> +       else
> +               __get_task_comm(tcomm, sizeof(tcomm), p);
>
>         size = seq_get_buf(m, &buf);
> -       ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
> -       seq_commit(m, ret < size ? ret : -1);
> +       if (escape) {
> +               ret = string_escape_str(tcomm, buf, size,
> +                                       ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
> +               if (ret >= size)
> +                       ret = -1;
> +       } else {
> +               ret = strscpy(buf, tcomm, size);
> +       }
>
> -       seq_putc(m, '\n');
> +       seq_commit(m, ret);
>  }
>
>  /*
> @@ -365,7 +372,10 @@ int proc_pid_status(struct seq_file *m,
>  {
>         struct mm_struct *mm = get_task_mm(task);
>
> -       task_name(m, task);
> +       seq_puts(m, "Name:\t");
> +       proc_task_name(m, task, true);
> +       seq_putc(m, '\n');
> +
>         task_state(m, ns, pid, task);
>
>         if (mm) {
> @@ -400,7 +410,6 @@ static int do_task_stat(struct seq_file
>         u64 cutime, cstime, utime, stime;
>         u64 cgtime, gtime;
>         unsigned long rsslim = 0;
> -       char tcomm[sizeof(task->comm)];
>         unsigned long flags;
>
>         state = *get_task_state(task);
> @@ -427,8 +436,6 @@ static int do_task_stat(struct seq_file
>                 }
>         }
>
> -       get_task_comm(tcomm, task);
> -
>         sigemptyset(&sigign);
>         sigemptyset(&sigcatch);
>         cutime = cstime = utime = stime = 0;
> @@ -495,7 +502,7 @@ static int do_task_stat(struct seq_file
>
>         seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
>         seq_puts(m, " (");
> -       seq_puts(m, tcomm);
> +       proc_task_name(m, task, false);
>         seq_puts(m, ") ");
>         seq_putc(m, state);
>         seq_put_decimal_ll(m, " ", ppid);
> Index: work/fs/proc/base.c
> ===================================================================
> --- work.orig/fs/proc/base.c
> +++ work/fs/proc/base.c
> @@ -223,6 +223,7 @@ static ssize_t proc_pid_cmdline_read(str
>         tsk = get_proc_task(file_inode(file));
>         if (!tsk)
>                 return -ESRCH;
> +
>         mm = get_task_mm(tsk);
>         put_task_struct(tsk);
>         if (!mm)
> @@ -1565,12 +1566,8 @@ static int comm_show(struct seq_file *m,
>         if (!p)
>                 return -ESRCH;
>
> -       task_lock(p);
> -       seq_printf(m, "%s\n", p->comm);
> -       task_unlock(p);
> -
> +       proc_task_name(m, p, false);
>         put_task_struct(p);
> -
>         return 0;
>  }
>
> Index: work/fs/proc/internal.h
> ===================================================================
> --- work.orig/fs/proc/internal.h
> +++ work/fs/proc/internal.h
> @@ -131,6 +131,8 @@ unsigned name_to_int(const struct qstr *
>   */
>  extern const struct file_operations proc_tid_children_operations;
>
> +extern void proc_task_name(struct seq_file *m, struct task_struct *p,
> +                          bool escape);
>  extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
>                          struct pid *, struct task_struct *);
>  extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
> Index: work/kernel/workqueue.c
> ===================================================================
> --- work.orig/kernel/workqueue.c
> +++ work/kernel/workqueue.c
> @@ -66,7 +66,7 @@ enum {
>          * be executing on any CPU.  The pool behaves as an unbound one.
>          *
>          * Note that DISASSOCIATED should be flipped only while holding
> -        * attach_mutex to avoid changing binding state while
> +        * wq_pool_attach_mutex to avoid changing binding state while
>          * worker_attach_to_pool() is in progress.
>          */
>         POOL_MANAGER_ACTIVE     = 1 << 0,       /* being managed */
> @@ -123,7 +123,7 @@ enum {
>   *    cpu or grabbing pool->lock is enough for read access.  If
>   *    POOL_DISASSOCIATED is set, it's identical to L.
>   *
> - * A: pool->attach_mutex protected.
> + * A: wq_pool_attach_mutex protected.
>   *
>   * PL: wq_pool_mutex protected.
>   *
> @@ -166,7 +166,6 @@ struct worker_pool {
>                                                 /* L: hash of busy workers */
>
>         struct worker           *manager;       /* L: purely informational */
> -       struct mutex            attach_mutex;   /* attach/detach exclusion */
>         struct list_head        workers;        /* A: attached workers */
>         struct completion       *detach_completion; /* all workers detached */
>
> @@ -297,6 +296,7 @@ static bool wq_numa_enabled;                /* unbound
>  static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
>
>  static DEFINE_MUTEX(wq_pool_mutex);    /* protects pools and workqueues list */
> +static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
>  static DEFINE_SPINLOCK(wq_mayday_lock);        /* protects wq->maydays list */
>  static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
>
> @@ -399,14 +399,14 @@ static void workqueue_sysfs_unregister(s
>   * @worker: iteration cursor
>   * @pool: worker_pool to iterate workers of
>   *
> - * This must be called with @pool->attach_mutex.
> + * This must be called with wq_pool_attach_mutex.
>   *
>   * The if/else clause exists only for the lockdep assertion and can be
>   * ignored.
>   */
>  #define for_each_pool_worker(worker, pool)                             \
>         list_for_each_entry((worker), &(pool)->workers, node)           \
> -               if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
> +               if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
>                 else
>
>  /**
> @@ -1724,7 +1724,7 @@ static struct worker *alloc_worker(int n
>  static void worker_attach_to_pool(struct worker *worker,
>                                    struct worker_pool *pool)
>  {
> -       mutex_lock(&pool->attach_mutex);
> +       mutex_lock(&wq_pool_attach_mutex);
>
>         /*
>          * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
> @@ -1733,37 +1733,40 @@ static void worker_attach_to_pool(struct
>         set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
>
>         /*
> -        * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
> -        * stable across this function.  See the comments above the
> -        * flag definition for details.
> +        * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
> +        * stable across this function.  See the comments above the flag
> +        * definition for details.
>          */
>         if (pool->flags & POOL_DISASSOCIATED)
>                 worker->flags |= WORKER_UNBOUND;
>
>         list_add_tail(&worker->node, &pool->workers);
> +       worker->pool = pool;
>
> -       mutex_unlock(&pool->attach_mutex);
> +       mutex_unlock(&wq_pool_attach_mutex);
>  }
>
>  /**
>   * worker_detach_from_pool() - detach a worker from its pool
>   * @worker: worker which is attached to its pool
> - * @pool: the pool @worker is attached to
>   *
>   * Undo the attaching which had been done in worker_attach_to_pool().  The
>   * caller worker shouldn't access to the pool after detached except it has
>   * other reference to the pool.
>   */
> -static void worker_detach_from_pool(struct worker *worker,
> -                                   struct worker_pool *pool)
> +static void worker_detach_from_pool(struct worker *worker)
>  {
> +       struct worker_pool *pool = worker->pool;
>         struct completion *detach_completion = NULL;
>
> -       mutex_lock(&pool->attach_mutex);
> +       mutex_lock(&wq_pool_attach_mutex);
> +
>         list_del(&worker->node);
> +       worker->pool = NULL;
> +
>         if (list_empty(&pool->workers))
>                 detach_completion = pool->detach_completion;
> -       mutex_unlock(&pool->attach_mutex);
> +       mutex_unlock(&wq_pool_attach_mutex);
>
>         /* clear leftover flags without pool->lock after it is detached */
>         worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
> @@ -1799,7 +1802,6 @@ static struct worker *create_worker(stru
>         if (!worker)
>                 goto fail;
>
> -       worker->pool = pool;
>         worker->id = id;
>
>         if (pool->cpu >= 0)
> @@ -2086,6 +2088,12 @@ __acquires(&pool->lock)
>         worker->current_pwq = pwq;
>         work_color = get_work_color(work);
>
> +       /*
> +        * Record wq name for cmdline and debug reporting, may get
> +        * overridden through set_worker_desc().
> +        */
> +       strlcpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
> +
>         list_del_init(&work->entry);
>
>         /*
> @@ -2181,7 +2189,6 @@ __acquires(&pool->lock)
>         worker->current_work = NULL;
>         worker->current_func = NULL;
>         worker->current_pwq = NULL;
> -       worker->desc_valid = false;
>         pwq_dec_nr_in_flight(pwq, work_color);
>  }
>
> @@ -2236,7 +2243,7 @@ woke_up:
>
>                 set_task_comm(worker->task, "kworker/dying");
>                 ida_simple_remove(&pool->worker_ida, worker->id);
> -               worker_detach_from_pool(worker, pool);
> +               worker_detach_from_pool(worker);
>                 kfree(worker);
>                 return 0;
>         }
> @@ -2367,7 +2374,6 @@ repeat:
>                 worker_attach_to_pool(rescuer, pool);
>
>                 spin_lock_irq(&pool->lock);
> -               rescuer->pool = pool;
>
>                 /*
>                  * Slurp in all works issued via this workqueue and
> @@ -2417,10 +2423,9 @@ repeat:
>                 if (need_more_worker(pool))
>                         wake_up_worker(pool);
>
> -               rescuer->pool = NULL;
>                 spin_unlock_irq(&pool->lock);
>
> -               worker_detach_from_pool(rescuer, pool);
> +               worker_detach_from_pool(rescuer);
>
>                 spin_lock_irq(&wq_mayday_lock);
>         }
> @@ -3271,7 +3276,6 @@ static int init_worker_pool(struct worke
>
>         timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
>
> -       mutex_init(&pool->attach_mutex);
>         INIT_LIST_HEAD(&pool->workers);
>
>         ida_init(&pool->worker_ida);
> @@ -3354,10 +3358,10 @@ static void put_unbound_pool(struct work
>         WARN_ON(pool->nr_workers || pool->nr_idle);
>         spin_unlock_irq(&pool->lock);
>
> -       mutex_lock(&pool->attach_mutex);
> +       mutex_lock(&wq_pool_attach_mutex);
>         if (!list_empty(&pool->workers))
>                 pool->detach_completion = &detach_completion;
> -       mutex_unlock(&pool->attach_mutex);
> +       mutex_unlock(&wq_pool_attach_mutex);
>
>         if (pool->detach_completion)
>                 wait_for_completion(pool->detach_completion);
> @@ -4347,7 +4351,6 @@ void set_worker_desc(const char *fmt, ..
>                 va_start(args, fmt);
>                 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
>                 va_end(args);
> -               worker->desc_valid = true;
>         }
>  }
>
> @@ -4369,9 +4372,9 @@ void print_worker_info(const char *log_l
>         work_func_t *fn = NULL;
>         char name[WQ_NAME_LEN] = { };
>         char desc[WORKER_DESC_LEN] = { };
> +       struct work_struct *work = NULL;
>         struct pool_workqueue *pwq = NULL;
>         struct workqueue_struct *wq = NULL;
> -       bool desc_valid = false;
>         struct worker *worker;
>
>         if (!(task->flags & PF_WQ_WORKER))
> @@ -4383,6 +4386,11 @@ void print_worker_info(const char *log_l
>          */
>         worker = kthread_probe_data(task);
>
> +       /* skip idle workers */
> +       probe_kernel_read(&work, &worker->current_work, sizeof(work));
> +       if (!work)
> +               return;
> +
>         /*
>          * Carefully copy the associated workqueue's workfn and name.  Keep
>          * the original last '\0' in case the original contains garbage.
> @@ -4393,16 +4401,9 @@ void print_worker_info(const char *log_l
>         probe_kernel_read(name, wq->name, sizeof(name) - 1);
>
>         /* copy worker description */
> -       probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
> -       if (desc_valid)
> -               probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
> -
> -       if (fn || name[0] || desc[0]) {
> -               printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
> -               if (desc[0])
> -                       pr_cont(" (%s)", desc);
> -               pr_cont("\n");
> -       }
> +       probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
> +
> +       printk("%sWorkqueue: %s %pf (%s)\n", log_lvl, name, fn, desc);
>  }
>
>  static void pr_cont_pool_info(struct worker_pool *pool)
> @@ -4579,6 +4580,34 @@ void show_workqueue_state(void)
>         rcu_read_unlock_sched();
>  }
>
> +void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
> +{
> +       struct worker *worker;
> +       struct worker_pool *pool;
> +
> +       mutex_lock(&wq_pool_attach_mutex);
> +

Need to recheck
task->flags | PF_WQ_WORKER
again, because @task is often not the current task,
the worker returned by kthread_data(task)
could have been freed when the worker
was destroyed.

But rechecking task->flags | PF_WQ_WORKER
might not be enough for rescuer_thread. It's hard
to say. Maybe some changes to rescuer_thread()
are needed, or rcu_sched is needed here.

BTW, the document for worker->pool needs
to be updated since it is changed to be protected
by wq_pool_attach_mutex now.

BTW, is it possible to keep attach_mutex
unchanged? rcu_sched might be enough.

rcu_read_lock_sched();
if (task->flags | PF_WQ_WORKER) {
   pool = READ_ONCE(worker->pool);
   if (pool) {
     spin_lock_irq(&pool->lock);
     if (worker->pool == pool) {
       /* pool can't be free due to blablabla */
       /* code */

I'm not sure.


> +       worker = kthread_data(task);
> +       pool = worker->pool;
> +
> +       if (pool) {
> +               spin_lock_irq(&pool->lock);
> +               if (worker->desc[0] != '\0') {
> +                       if (worker->current_work)
> +                               snprintf(buf, size, "%s+%s",
> +                                        task->comm, worker->desc);
> +                       else
> +                               snprintf(buf, size, "%s-%s",
> +                                        task->comm, worker->desc);
> +               }
> +               spin_unlock_irq(&pool->lock);
> +       } else {
> +               snprintf(buf, size, "%s", task->comm);
> +       }
> +
> +       mutex_unlock(&wq_pool_attach_mutex);
> +}
> +
>  /*
>   * CPU hotplug.
>   *
> @@ -4600,7 +4629,7 @@ static void unbind_workers(int cpu)
>         struct worker *worker;
>
>         for_each_cpu_worker_pool(pool, cpu) {
> -               mutex_lock(&pool->attach_mutex);
> +               mutex_lock(&wq_pool_attach_mutex);
>                 spin_lock_irq(&pool->lock);
>
>                 /*
> @@ -4616,7 +4645,7 @@ static void unbind_workers(int cpu)
>                 pool->flags |= POOL_DISASSOCIATED;
>
>                 spin_unlock_irq(&pool->lock);
> -               mutex_unlock(&pool->attach_mutex);
> +               mutex_unlock(&wq_pool_attach_mutex);
>
>                 /*
>                  * Call schedule() so that we cross rq->lock and thus can
> @@ -4657,7 +4686,7 @@ static void rebind_workers(struct worker
>  {
>         struct worker *worker;
>
> -       lockdep_assert_held(&pool->attach_mutex);
> +       lockdep_assert_held(&wq_pool_attach_mutex);
>
>         /*
>          * Restore CPU affinity of all workers.  As all idle workers should
> @@ -4727,7 +4756,7 @@ static void restore_unbound_workers_cpum
>         static cpumask_t cpumask;
>         struct worker *worker;
>
> -       lockdep_assert_held(&pool->attach_mutex);
> +       lockdep_assert_held(&wq_pool_attach_mutex);
>
>         /* is @cpu allowed for @pool? */
>         if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
> @@ -4762,14 +4791,14 @@ int workqueue_online_cpu(unsigned int cp
>         mutex_lock(&wq_pool_mutex);
>
>         for_each_pool(pool, pi) {
> -               mutex_lock(&pool->attach_mutex);
> +               mutex_lock(&wq_pool_attach_mutex);
>
>                 if (pool->cpu == cpu)
>                         rebind_workers(pool);
>                 else if (pool->cpu < 0)
>                         restore_unbound_workers_cpumask(pool, cpu);
>
> -               mutex_unlock(&pool->attach_mutex);
> +               mutex_unlock(&wq_pool_attach_mutex);
>         }
>
>         /* update NUMA affinity of unbound workqueues */
> Index: work/kernel/workqueue_internal.h
> ===================================================================
> --- work.orig/kernel/workqueue_internal.h
> +++ work/kernel/workqueue_internal.h
> @@ -31,7 +31,6 @@ struct worker {
>         struct work_struct      *current_work;  /* L: work being processed */
>         work_func_t             current_func;   /* L: current_work's fn */
>         struct pool_workqueue   *current_pwq; /* L: current_work's pwq */
> -       bool                    desc_valid;     /* ->desc is valid */
>         struct list_head        scheduled;      /* L: scheduled works */
>
>         /* 64 bytes boundary on 64bit, 32 on 32bit */
> Index: work/include/linux/workqueue.h
> ===================================================================
> --- work.orig/include/linux/workqueue.h
> +++ work/include/linux/workqueue.h
> @@ -494,6 +494,7 @@ extern unsigned int work_busy(struct wor
>  extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
>  extern void print_worker_info(const char *log_lvl, struct task_struct *task);
>  extern void show_workqueue_state(void);
> +extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
>
>  /**
>   * queue_work - queue work on a workqueue
--
To unsubscribe from this list: send the line "unsubscribe linux-api" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux