v4.14.292-rt138-rc1 stable review patch. If anyone has any objections, please let me know. ----------- The recent changes to the random() code require that the workqueue code in v4.14-rt is updated. Given the code proximity, v4.19.255-rt113 was the selected target. Included in this patch: - convert struct worker_pool lock and wq_mayday_lock to raw_spinlock_t - backport swait_event_lock_irq() macros - misc workqueue changes from v4.19.255-rt113 Files touched: include/linux/swait.h include/linux/workqueue.h kernel/workqueue.c Signed-off-by: Luis Claudio R. Goncalves <lgoncalv@xxxxxxxxxx> --- Notes: This kernel works fine (tested with LTP and stress-ng, both with and without lockdep enabled. But on poweroff or reboot there are tree backtraces coming from the commit listed below, part of the all-in-one "workqueue: update code to match v4.19.255-rt113" patch: 8c37f7c23c02f workqueue: Try to catch flush_work() without INIT_WORK(). This commits translates to the following statement in __flush_work(), from kernel/workqueue.c: if (WARN_ON(!work->func)) return false; Should I remove this change? The fix requires a good amount of changes and takes us farther away from stock v4.14 in a dangerous way. And an even more important question, is this all-in-one patch acceptable? Should I backport all the individual commits instead? include/linux/swait.h | 14 + include/linux/workqueue.h | 10 +- kernel/workqueue.c | 623 ++++++++++++++++++++------------------ 3 files changed, 338 insertions(+), 309 deletions(-) diff --git a/include/linux/swait.h b/include/linux/swait.h index 853f3e61a9f4c..d41917e222d1b 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -286,4 +286,18 @@ do { \ __ret; \ }) +#define __swait_event_lock_irq(wq, condition, lock, cmd) \ + ___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ + raw_spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + raw_spin_lock_irq(&lock)) + +#define swait_event_lock_irq(wq_head, condition, lock) \ + do { \ + if (condition) \ + break; \ + __swait_event_lock_irq(wq_head, condition, lock, ); \ + } while (0) + #endif /* _LINUX_SWAIT_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1c527abb1ae58..bbb14d6b6ba38 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -219,7 +219,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) @@ -399,7 +399,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, static struct lock_class_key __key; \ const char *__lock_name; \ \ - __lock_name = #fmt#args; \ + __lock_name = "(wq_completion)"#fmt#args; \ \ __alloc_workqueue_key((fmt), (flags), (max_active), \ &__key, __lock_name, ##args); \ @@ -437,10 +437,6 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); -void free_workqueue_attrs(struct workqueue_attrs *attrs); -int apply_workqueue_attrs(struct workqueue_struct *wq, - const struct workqueue_attrs *attrs); int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, @@ -458,7 +454,6 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); -extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); @@ -474,6 +469,7 @@ extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void show_workqueue_state(void); +extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** * queue_work - queue work on a workqueue diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bc526b793e838..d52953d01ba29 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -38,7 +38,6 @@ #include <linux/hardirq.h> #include <linux/mempolicy.h> #include <linux/freezer.h> -#include <linux/kallsyms.h> #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> @@ -49,8 +48,6 @@ #include <linux/moduleparam.h> #include <linux/uaccess.h> #include <linux/nmi.h> -#include <linux/locallock.h> -#include <linux/delay.h> #include <linux/kvm_para.h> #include "workqueue_internal.h" @@ -69,7 +66,7 @@ enum { * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED should be flipped only while holding - * attach_mutex to avoid changing binding state while + * wq_pool_attach_mutex to avoid changing binding state while * worker_attach_to_pool() is in progress. */ POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */ @@ -126,12 +123,7 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * - * On RT we need the extra protection via rt_lock_idle_list() for - * the list manipulations against read access from - * wq_worker_sleeping(). All other places are nicely serialized via - * pool->lock. - * - * A: pool->attach_mutex protected. + * A: wq_pool_attach_mutex protected. * * PL: wq_pool_mutex protected. * @@ -152,7 +144,7 @@ enum { /* struct worker is defined in workqueue_internal.h */ struct worker_pool { - spinlock_t lock; /* the pool lock */ + raw_spinlock_t lock; /* the pool lock */ int cpu; /* I: the associated cpu */ int node; /* I: the associated node ID */ int id; /* I: pool ID */ @@ -161,10 +153,9 @@ struct worker_pool { unsigned long watchdog_ts; /* L: watchdog timestamp */ struct list_head worklist; /* L: list of pending works */ - int nr_workers; /* L: total number of workers */ - /* nr_idle includes the ones off idle_list for rebinding */ - int nr_idle; /* L: currently idle ones */ + int nr_workers; /* L: total number of workers */ + int nr_idle; /* L: currently idle workers */ struct list_head idle_list; /* X: list of idle workers */ struct timer_list idle_timer; /* L: worker idle timeout */ @@ -174,9 +165,7 @@ struct worker_pool { DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /* L: hash of busy workers */ - /* see manage_workers() for details on the two manager mutexes */ struct worker *manager; /* L: purely informational */ - struct mutex attach_mutex; /* attach/detach exclusion */ struct list_head workers; /* A: attached workers */ struct completion *detach_completion; /* all workers detached */ @@ -307,8 +296,9 @@ static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ -static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ -static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ +static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ +static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ +static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ @@ -360,8 +350,6 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); -static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); - static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); @@ -411,14 +399,14 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @worker: iteration cursor * @pool: worker_pool to iterate workers of * - * This must be called with @pool->attach_mutex. + * This must be called with wq_pool_attach_mutex. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool_worker(worker, pool) \ list_for_each_entry((worker), &(pool)->workers, node) \ - if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \ + if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \ else /** @@ -438,31 +426,6 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ else -#ifdef CONFIG_PREEMPT_RT_BASE -static inline void rt_lock_idle_list(struct worker_pool *pool) -{ - preempt_disable(); -} -static inline void rt_unlock_idle_list(struct worker_pool *pool) -{ - preempt_enable(); -} -static inline void sched_lock_idle_list(struct worker_pool *pool) { } -static inline void sched_unlock_idle_list(struct worker_pool *pool) { } -#else -static inline void rt_lock_idle_list(struct worker_pool *pool) { } -static inline void rt_unlock_idle_list(struct worker_pool *pool) { } -static inline void sched_lock_idle_list(struct worker_pool *pool) -{ - spin_lock_irq(&pool->lock); -} -static inline void sched_unlock_idle_list(struct worker_pool *pool) -{ - spin_unlock_irq(&pool->lock); -} -#endif - - #ifdef CONFIG_DEBUG_OBJECTS_WORK static struct debug_obj_descr work_debug_descr; @@ -865,20 +828,14 @@ static struct worker *first_idle_worker(struct worker_pool *pool) * Wake up the first idle worker of @pool. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void wake_up_worker(struct worker_pool *pool) { - struct worker *worker; - - rt_lock_idle_list(pool); - - worker = first_idle_worker(pool); + struct worker *worker = first_idle_worker(pool); if (likely(worker)) wake_up_process(worker->task); - - rt_unlock_idle_list(pool); } /** @@ -907,7 +864,7 @@ void wq_worker_running(struct task_struct *task) */ void wq_worker_sleeping(struct task_struct *task) { - struct worker *worker = kthread_data(task); + struct worker *next, *worker = kthread_data(task); struct worker_pool *pool; /* @@ -924,18 +881,26 @@ void wq_worker_sleeping(struct task_struct *task) return; worker->sleeping = 1; + raw_spin_lock_irq(&pool->lock); /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). * Please read comment there. + * + * NOT_RUNNING is clear. This means that we're bound to and + * running on the local cpu w/ rq lock held and preemption + * disabled, which in turn means that none else could be + * manipulating idle_list, so dereferencing idle_list without pool + * lock is safe. */ if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) { - sched_lock_idle_list(pool); - wake_up_worker(pool); - sched_unlock_idle_list(pool); + next = first_idle_worker(pool); + if (next) + wake_up_process(next->task); } + raw_spin_unlock_irq(&pool->lock); } /** @@ -946,7 +911,7 @@ void wq_worker_sleeping(struct task_struct *task) * Set @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(pool->lock) + * raw_spin_lock_irq(pool->lock) */ static inline void worker_set_flags(struct worker *worker, unsigned int flags) { @@ -971,7 +936,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags) * Clear @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(pool->lock) + * raw_spin_lock_irq(pool->lock) */ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) { @@ -1019,7 +984,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) * actually occurs, it should be easy to locate the culprit work function. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). * * Return: * Pointer to worker which is executing @work if found, %NULL @@ -1054,7 +1019,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, * nested inside outer list_for_each_entry_safe(). * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void move_linked_works(struct work_struct *work, struct list_head *head, struct work_struct **nextp) @@ -1132,11 +1097,9 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ - rcu_read_lock(); - local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); + raw_spin_lock_irq(&pwq->pool->lock); put_pwq(pwq); - local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); - rcu_read_unlock(); + raw_spin_unlock_irq(&pwq->pool->lock); } } @@ -1169,7 +1132,7 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq) * decrement nr_in_flight of its pwq and handle workqueue flushing. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) { @@ -1240,7 +1203,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, struct worker_pool *pool; struct pool_workqueue *pwq; - local_lock_irqsave(pendingb_lock, *flags); + local_irq_save(*flags); /* try to steal the timer if it exists */ if (is_dwork) { @@ -1268,7 +1231,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (!pool) goto fail; - spin_lock(&pool->lock); + raw_spin_lock(&pool->lock); /* * work->data is guaranteed to point to pwq only while the work * item is queued on pwq->wq, and both updating work->data to point @@ -1297,17 +1260,17 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, /* work->data points to pwq iff queued, point to pool */ set_work_pool_and_keep_pending(work, pool->id); - spin_unlock(&pool->lock); + raw_spin_unlock(&pool->lock); rcu_read_unlock(); return 1; } - spin_unlock(&pool->lock); + raw_spin_unlock(&pool->lock); fail: rcu_read_unlock(); - local_unlock_irqrestore(pendingb_lock, *flags); + local_irq_restore(*flags); if (work_is_canceling(work)) return -ENOENT; - cpu_chill(); + cpu_relax(); return -EAGAIN; } @@ -1322,7 +1285,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, * work_struct flags. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct list_head *head, unsigned int extra_flags) @@ -1409,7 +1372,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ - WARN_ON_ONCE_NONRT(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); /* if draining, only works from the same workqueue are allowed */ @@ -1438,7 +1401,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (last_pool && last_pool != pwq->pool) { struct worker *worker; - spin_lock(&last_pool->lock); + raw_spin_lock(&last_pool->lock); worker = find_worker_executing_work(last_pool, work); @@ -1446,11 +1409,11 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, pwq = worker->current_pwq; } else { /* meh... not running there, queue here */ - spin_unlock(&last_pool->lock); - spin_lock(&pwq->pool->lock); + raw_spin_unlock(&last_pool->lock); + raw_spin_lock(&pwq->pool->lock); } } else { - spin_lock(&pwq->pool->lock); + raw_spin_lock(&pwq->pool->lock); } /* @@ -1463,7 +1426,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, */ if (unlikely(!pwq->refcnt)) { if (wq->flags & WQ_UNBOUND) { - spin_unlock(&pwq->pool->lock); + raw_spin_unlock(&pwq->pool->lock); cpu_relax(); goto retry; } @@ -1496,7 +1459,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, insert_work(pwq, work, worklist, work_flags); out: - spin_unlock(&pwq->pool->lock); + raw_spin_unlock(&pwq->pool->lock); rcu_read_unlock(); } @@ -1517,14 +1480,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; - local_lock_irqsave(pendingb_lock,flags); + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_work(cpu, wq, work); ret = true; } - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } EXPORT_SYMBOL(queue_work_on); @@ -1532,12 +1495,11 @@ EXPORT_SYMBOL(queue_work_on); void delayed_work_timer_fn(unsigned long __data) { struct delayed_work *dwork = (struct delayed_work *)__data; + unsigned long flags; - /* XXX */ - /* local_lock(pendingb_lock); */ - /* should have been called from irqsafe timer with irq already off */ + local_irq_save(flags); __queue_work(dwork->cpu, dwork->wq, &dwork->work); - /* local_unlock(pendingb_lock); */ + local_irq_restore(flags); } EXPORT_SYMBOL(delayed_work_timer_fn); @@ -1593,14 +1555,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, unsigned long flags; /* read the comment in __queue_work() */ - local_lock_irqsave(pendingb_lock, flags); + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } EXPORT_SYMBOL(queue_delayed_work_on); @@ -1635,7 +1597,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); } /* -ENOENT from try_to_grab_pending() becomes %true */ @@ -1651,7 +1613,7 @@ EXPORT_SYMBOL_GPL(mod_delayed_work_on); * necessary. * * LOCKING: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void worker_enter_idle(struct worker *worker) { @@ -1668,15 +1630,13 @@ static void worker_enter_idle(struct worker *worker) worker->last_active = jiffies; /* idle_list is LIFO */ - rt_lock_idle_list(pool); list_add(&worker->entry, &pool->idle_list); - rt_unlock_idle_list(pool); if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); /* - * Sanity check nr_running. Because wq_unbind_fn() releases + * Sanity check nr_running. Because unbind_workers() releases * pool->lock between setting %WORKER_UNBOUND and zapping * nr_running, the warning may trigger spuriously. Check iff * unbind is not in progress. @@ -1693,7 +1653,7 @@ static void worker_enter_idle(struct worker *worker) * @worker is leaving idle state. Update stats. * * LOCKING: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void worker_leave_idle(struct worker *worker) { @@ -1703,9 +1663,7 @@ static void worker_leave_idle(struct worker *worker) return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; - rt_lock_idle_list(pool); list_del_init(&worker->entry); - rt_unlock_idle_list(pool); } static struct worker *alloc_worker(int node) @@ -1735,46 +1693,46 @@ static struct worker *alloc_worker(int node) static void worker_attach_to_pool(struct worker *worker, struct worker_pool *pool) { - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); /* - * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any - * online CPUs. It'll be re-applied when any of the CPUs come up. - */ - set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); - - /* - * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains - * stable across this function. See the comments above the - * flag definition for details. + * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains + * stable across this function. See the comments above the flag + * definition for details. */ if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; + if (worker->rescue_wq) + set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); + list_add_tail(&worker->node, &pool->workers); + worker->pool = pool; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); } /** * worker_detach_from_pool() - detach a worker from its pool * @worker: worker which is attached to its pool - * @pool: the pool @worker is attached to * * Undo the attaching which had been done in worker_attach_to_pool(). The * caller worker shouldn't access to the pool after detached except it has * other reference to the pool. */ -static void worker_detach_from_pool(struct worker *worker, - struct worker_pool *pool) +static void worker_detach_from_pool(struct worker *worker) { + struct worker_pool *pool = worker->pool; struct completion *detach_completion = NULL; - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); + list_del(&worker->node); + worker->pool = NULL; + if (list_empty(&pool->workers)) detach_completion = pool->detach_completion; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); /* clear leftover flags without pool->lock after it is detached */ worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); @@ -1810,7 +1768,6 @@ static struct worker *create_worker(struct worker_pool *pool) if (!worker) goto fail; - worker->pool = pool; worker->id = id; if (pool->cpu >= 0) @@ -1831,11 +1788,11 @@ static struct worker *create_worker(struct worker_pool *pool) worker_attach_to_pool(worker, pool); /* start the newly created worker */ - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); worker->pool->nr_workers++; worker_enter_idle(worker); wake_up_process(worker->task); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); return worker; @@ -1854,7 +1811,7 @@ static struct worker *create_worker(struct worker_pool *pool) * be idle. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void destroy_worker(struct worker *worker) { @@ -1871,9 +1828,7 @@ static void destroy_worker(struct worker *worker) pool->nr_workers--; pool->nr_idle--; - rt_lock_idle_list(pool); list_del_init(&worker->entry); - rt_unlock_idle_list(pool); worker->flags |= WORKER_DIE; wake_up_process(worker->task); } @@ -1882,7 +1837,7 @@ static void idle_worker_timeout(unsigned long __pool) { struct worker_pool *pool = (void *)__pool; - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); while (too_many_workers(pool)) { struct worker *worker; @@ -1900,7 +1855,7 @@ static void idle_worker_timeout(unsigned long __pool) destroy_worker(worker); } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } static void send_mayday(struct work_struct *work) @@ -1931,8 +1886,8 @@ static void pool_mayday_timeout(unsigned long __pool) struct worker_pool *pool = (void *)__pool; struct work_struct *work; - spin_lock_irq(&pool->lock); - spin_lock(&wq_mayday_lock); /* for wq->maydays */ + raw_spin_lock_irq(&pool->lock); + raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ if (need_to_create_worker(pool)) { /* @@ -1945,8 +1900,8 @@ static void pool_mayday_timeout(unsigned long __pool) send_mayday(work); } - spin_unlock(&wq_mayday_lock); - spin_unlock_irq(&pool->lock); + raw_spin_unlock(&wq_mayday_lock); + raw_spin_unlock_irq(&pool->lock); mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); } @@ -1965,7 +1920,7 @@ static void pool_mayday_timeout(unsigned long __pool) * may_start_working() %true. * * LOCKING: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. Called only from * manager. */ @@ -1974,7 +1929,7 @@ __releases(&pool->lock) __acquires(&pool->lock) { restart: - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); @@ -1990,7 +1945,7 @@ __acquires(&pool->lock) } del_timer_sync(&pool->mayday_timer); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* * This is necessary even after a new worker was just successfully * created as @pool->lock was dropped and the new worker might have @@ -2013,7 +1968,7 @@ __acquires(&pool->lock) * and may_start_working() is true. * * CONTEXT: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. * * Return: @@ -2036,7 +1991,7 @@ static bool manage_workers(struct worker *worker) pool->manager = NULL; pool->flags &= ~POOL_MANAGER_ACTIVE; - wake_up(&wq_manager_wait); + swake_up(&wq_manager_wait); return true; } @@ -2052,7 +2007,7 @@ static bool manage_workers(struct worker *worker) * call this function to process a work. * * CONTEXT: - * spin_lock_irq(pool->lock) which is released and regrabbed. + * raw_spin_lock_irq(pool->lock) which is released and regrabbed. */ static void process_one_work(struct worker *worker, struct work_struct *work) __releases(&pool->lock) @@ -2099,6 +2054,12 @@ __acquires(&pool->lock) worker->current_pwq = pwq; work_color = get_work_color(work); + /* + * Record wq name for cmdline and debug reporting, may get + * overridden through set_worker_desc(). + */ + strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); + list_del_init(&work->entry); /* @@ -2128,7 +2089,7 @@ __acquires(&pool->lock) */ set_work_pool_and_clear_pending(work, pool->id); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); lock_map_acquire(&pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); @@ -2181,9 +2142,9 @@ __acquires(&pool->lock) * stop_machine. At the same time, report a quiescent RCU state so * the same condition doesn't freeze RCU. */ - cond_resched_rcu_qs(); + cond_resched(); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* clear cpu intensive status */ if (unlikely(cpu_intensive)) @@ -2194,7 +2155,6 @@ __acquires(&pool->lock) worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; - worker->desc_valid = false; pwq_dec_nr_in_flight(pwq, work_color); } @@ -2207,7 +2167,7 @@ __acquires(&pool->lock) * fetches a work from the top and executes it. * * CONTEXT: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. */ static void process_scheduled_works(struct worker *worker) @@ -2219,6 +2179,16 @@ static void process_scheduled_works(struct worker *worker) } } +static void set_pf_worker(bool val) +{ + mutex_lock(&wq_pool_attach_mutex); + if (val) + current->flags |= PF_WQ_WORKER; + else + current->flags &= ~PF_WQ_WORKER; + mutex_unlock(&wq_pool_attach_mutex); +} + /** * worker_thread - the worker thread function * @__worker: self @@ -2237,19 +2207,19 @@ static int worker_thread(void *__worker) struct worker_pool *pool = worker->pool; /* tell the scheduler that this is a workqueue worker */ - worker->task->flags |= PF_WQ_WORKER; + set_pf_worker(true); woke_up: - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* am I supposed to die? */ if (unlikely(worker->flags & WORKER_DIE)) { - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); WARN_ON_ONCE(!list_empty(&worker->entry)); - worker->task->flags &= ~PF_WQ_WORKER; + set_pf_worker(false); set_task_comm(worker->task, "kworker/dying"); ida_simple_remove(&pool->worker_ida, worker->id); - worker_detach_from_pool(worker, pool); + worker_detach_from_pool(worker); kfree(worker); return 0; } @@ -2309,7 +2279,7 @@ static int worker_thread(void *__worker) */ worker_enter_idle(worker); __set_current_state(TASK_IDLE); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); schedule(); goto woke_up; } @@ -2348,7 +2318,7 @@ static int rescuer_thread(void *__rescuer) * Mark rescuer as worker too. As WORKER_PREP is never cleared, it * doesn't participate in concurrency management. */ - rescuer->task->flags |= PF_WQ_WORKER; + set_pf_worker(true); repeat: set_current_state(TASK_IDLE); @@ -2363,7 +2333,7 @@ static int rescuer_thread(void *__rescuer) should_stop = kthread_should_stop(); /* see whether any pwq is asking for help */ - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); while (!list_empty(&wq->maydays)) { struct pool_workqueue *pwq = list_first_entry(&wq->maydays, @@ -2375,12 +2345,11 @@ static int rescuer_thread(void *__rescuer) __set_current_state(TASK_RUNNING); list_del_init(&pwq->mayday_node); - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); worker_attach_to_pool(rescuer, pool); - spin_lock_irq(&pool->lock); - rescuer->pool = pool; + raw_spin_lock_irq(&pool->lock); /* * Slurp in all works issued via this workqueue and @@ -2409,7 +2378,7 @@ static int rescuer_thread(void *__rescuer) * incur MAYDAY_INTERVAL delay inbetween. */ if (need_to_create_worker(pool)) { - spin_lock(&wq_mayday_lock); + raw_spin_lock(&wq_mayday_lock); /* * Queue iff we aren't racing destruction * and somebody else hasn't queued it already. @@ -2418,7 +2387,7 @@ static int rescuer_thread(void *__rescuer) get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); } - spin_unlock(&wq_mayday_lock); + raw_spin_unlock(&wq_mayday_lock); } } @@ -2436,19 +2405,18 @@ static int rescuer_thread(void *__rescuer) if (need_more_worker(pool)) wake_up_worker(pool); - rescuer->pool = NULL; - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); - worker_detach_from_pool(rescuer, pool); + worker_detach_from_pool(rescuer); - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); } - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); if (should_stop) { __set_current_state(TASK_RUNNING); - rescuer->task->flags &= ~PF_WQ_WORKER; + set_pf_worker(false); return 0; } @@ -2524,7 +2492,7 @@ static void wq_barrier_func(struct work_struct *work) * underneath us, so we can't reliably determine pwq from @target. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void insert_wq_barrier(struct pool_workqueue *pwq, struct wq_barrier *barr, @@ -2551,6 +2519,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq, "(complete)wq_barr::done", target->lockdep_map.key, 1); __init_completion(&barr->done); + barr->task = current; /* @@ -2618,7 +2587,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, for_each_pwq(pwq, wq) { struct worker_pool *pool = pwq->pool; - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); if (flush_color >= 0) { WARN_ON_ONCE(pwq->flush_color != -1); @@ -2635,7 +2604,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, pwq->work_color = work_color; } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) @@ -2835,9 +2804,9 @@ void drain_workqueue(struct workqueue_struct *wq) for_each_pwq(pwq, wq) { bool drained; - spin_lock_irq(&pwq->pool->lock); + raw_spin_lock_irq(&pwq->pool->lock); drained = !pwq->nr_active && list_empty(&pwq->delayed_works); - spin_unlock_irq(&pwq->pool->lock); + raw_spin_unlock_irq(&pwq->pool->lock); if (drained) continue; @@ -2857,7 +2826,8 @@ void drain_workqueue(struct workqueue_struct *wq) } EXPORT_SYMBOL_GPL(drain_workqueue); -static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) +static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, + bool from_cancel) { struct worker *worker = NULL; struct worker_pool *pool; @@ -2872,7 +2842,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) return false; } - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { @@ -2888,7 +2858,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) check_flush_dependency(pwq->wq, work); insert_wq_barrier(pwq, barr, work, worker); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); /* * Force a lock recursion deadlock when using flush_work() inside a @@ -2899,40 +2869,35 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) * workqueues the deadlock happens when the rescuer stalls, blocking * forward progress. */ - if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) { + if (!from_cancel && + (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) { lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } rcu_read_unlock(); return true; already_gone: - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); rcu_read_unlock(); return false; } -/** - * flush_work - wait for a work to finish executing the last queueing instance - * @work: the work to flush - * - * Wait until @work has finished execution. @work is guaranteed to be idle - * on return if it hasn't been requeued since flush started. - * - * Return: - * %true if flush_work() waited for the work to finish execution, - * %false if it was already idle. - */ -bool flush_work(struct work_struct *work) +static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; if (WARN_ON(!wq_online)) return false; - lock_map_acquire(&work->lockdep_map); - lock_map_release(&work->lockdep_map); + if (WARN_ON(!work->func)) + return false; - if (start_flush_work(work, &barr)) { + if (!from_cancel) { + lock_map_acquire(&work->lockdep_map); + lock_map_release(&work->lockdep_map); + } + + if (start_flush_work(work, &barr, from_cancel)) { wait_for_completion(&barr.done); destroy_work_on_stack(&barr.work); return true; @@ -2940,6 +2905,22 @@ bool flush_work(struct work_struct *work) return false; } } + +/** + * flush_work - wait for a work to finish executing the last queueing instance + * @work: the work to flush + * + * Wait until @work has finished execution. @work is guaranteed to be idle + * on return if it hasn't been requeued since flush started. + * + * Return: + * %true if flush_work() waited for the work to finish execution, + * %false if it was already idle. + */ +bool flush_work(struct work_struct *work) +{ + return __flush_work(work, false); +} EXPORT_SYMBOL_GPL(flush_work); struct cwt_wait { @@ -2997,14 +2978,14 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); /* * This allows canceling during early boot. We know that @work * isn't executing. */ if (wq_online) - flush_work(work); + __flush_work(work, true); clear_work_data(work); @@ -3058,10 +3039,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { - local_lock_irq(pendingb_lock); + local_irq_disable(); if (del_timer_sync(&dwork->timer)) __queue_work(dwork->cpu, dwork->wq, &dwork->work); - local_unlock_irq(pendingb_lock); + local_irq_enable(); return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); @@ -3079,18 +3060,10 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) return false; set_work_pool_and_clear_pending(work, get_work_pool_id(work)); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } -/* - * See cancel_delayed_work() - */ -bool cancel_work(struct work_struct *work) -{ - return __cancel_work(work, false); -} - /** * cancel_delayed_work - cancel a delayed work * @dwork: delayed_work to cancel @@ -3197,7 +3170,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context); * * Undo alloc_workqueue_attrs(). */ -void free_workqueue_attrs(struct workqueue_attrs *attrs) +static void free_workqueue_attrs(struct workqueue_attrs *attrs) { if (attrs) { free_cpumask_var(attrs->cpumask); @@ -3207,21 +3180,20 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) /** * alloc_workqueue_attrs - allocate a workqueue_attrs - * @gfp_mask: allocation mask to use * * Allocate a new workqueue_attrs, initialize with default settings and * return it. * * Return: The allocated new workqueue_attr on success. %NULL on failure. */ -struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask) +static struct workqueue_attrs *alloc_workqueue_attrs(void) { struct workqueue_attrs *attrs; - attrs = kzalloc(sizeof(*attrs), gfp_mask); + attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (!attrs) goto fail; - if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask)) + if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); @@ -3278,7 +3250,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a, */ static int init_worker_pool(struct worker_pool *pool) { - spin_lock_init(&pool->lock); + raw_spin_lock_init(&pool->lock); pool->id = -1; pool->cpu = -1; pool->node = NUMA_NO_NODE; @@ -3294,7 +3266,6 @@ static int init_worker_pool(struct worker_pool *pool) setup_timer(&pool->mayday_timer, pool_mayday_timeout, (unsigned long)pool); - mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); ida_init(&pool->worker_ida); @@ -3302,7 +3273,7 @@ static int init_worker_pool(struct worker_pool *pool) pool->refcnt = 1; /* shouldn't fail above this point */ - pool->attrs = alloc_workqueue_attrs(GFP_KERNEL); + pool->attrs = alloc_workqueue_attrs(); if (!pool->attrs) return -ENOMEM; return 0; @@ -3367,20 +3338,20 @@ static void put_unbound_pool(struct worker_pool *pool) * @pool's workers from blocking on attach_mutex. We're the last * manager and @pool gets freed with the flag set. */ - spin_lock_irq(&pool->lock); - wait_event_lock_irq(wq_manager_wait, + raw_spin_lock_irq(&pool->lock); + swait_event_lock_irq(wq_manager_wait, !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); pool->flags |= POOL_MANAGER_ACTIVE; while ((worker = first_idle_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); if (!list_empty(&pool->workers)) pool->detach_completion = &detach_completion; - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); if (pool->detach_completion) wait_for_completion(pool->detach_completion); @@ -3535,7 +3506,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) return; /* this function can be called during early boot w/ irq disabled */ - spin_lock_irqsave(&pwq->pool->lock, flags); + raw_spin_lock_irqsave(&pwq->pool->lock, flags); /* * During [un]freezing, the caller is responsible for ensuring that @@ -3565,7 +3536,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) pwq->max_active = 0; } - spin_unlock_irqrestore(&pwq->pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); } /* initialize newly alloced @pwq which is associated with @wq and @pool */ @@ -3735,12 +3706,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, int node; lockdep_assert_held(&wq_pool_mutex); - ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]), GFP_KERNEL); - new_attrs = alloc_workqueue_attrs(GFP_KERNEL); - tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); + new_attrs = alloc_workqueue_attrs(); + tmp_attrs = alloc_workqueue_attrs(); if (!ctx || !new_attrs || !tmp_attrs) goto out_free; @@ -3876,7 +3846,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, * * Return: 0 on success and -errno on failure. */ -int apply_workqueue_attrs(struct workqueue_struct *wq, +static int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { int ret; @@ -3964,9 +3934,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, use_dfl_pwq: mutex_lock(&wq->mutex); - spin_lock_irq(&wq->dfl_pwq->pool->lock); + raw_spin_lock_irq(&wq->dfl_pwq->pool->lock); get_pwq(wq->dfl_pwq); - spin_unlock_irq(&wq->dfl_pwq->pool->lock); + raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock); old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq); out_unlock: mutex_unlock(&wq->mutex); @@ -4020,6 +3990,37 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, return clamp_val(max_active, 1, lim); } +/* + * Workqueues which may be used during memory reclaim should have a rescuer + * to guarantee forward progress. + */ +static int init_rescuer(struct workqueue_struct *wq) +{ + struct worker *rescuer; + int ret; + + if (!(wq->flags & WQ_MEM_RECLAIM)) + return 0; + + rescuer = alloc_worker(NUMA_NO_NODE); + if (!rescuer) + return -ENOMEM; + + rescuer->rescue_wq = wq; + rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name); + ret = PTR_ERR_OR_ZERO(rescuer->task); + if (ret) { + kfree(rescuer); + return ret; + } + + wq->rescuer = rescuer; + kthread_bind_mask(rescuer->task, cpu_possible_mask); + wake_up_process(rescuer->task); + + return 0; +} + struct workqueue_struct *__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, @@ -4054,7 +4055,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, return NULL; if (flags & WQ_UNBOUND) { - wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL); + wq->unbound_attrs = alloc_workqueue_attrs(); if (!wq->unbound_attrs) goto err_free_wq; } @@ -4082,29 +4083,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, if (alloc_and_link_pwqs(wq) < 0) goto err_free_wq; - /* - * Workqueues which may be used during memory reclaim should - * have a rescuer to guarantee forward progress. - */ - if (flags & WQ_MEM_RECLAIM) { - struct worker *rescuer; - - rescuer = alloc_worker(NUMA_NO_NODE); - if (!rescuer) - goto err_destroy; - - rescuer->rescue_wq = wq; - rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", - wq->name); - if (IS_ERR(rescuer->task)) { - kfree(rescuer); - goto err_destroy; - } - - wq->rescuer = rescuer; - kthread_bind_mask(rescuer->task, cpu_possible_mask); - wake_up_process(rescuer->task); - } + if (wq_online && init_rescuer(wq) < 0) + goto err_destroy; if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) goto err_destroy; @@ -4162,9 +4142,9 @@ void destroy_workqueue(struct workqueue_struct *wq) struct worker *rescuer = wq->rescuer; /* this prevents new queueing */ - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); wq->rescuer = NULL; - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); /* rescuer will empty maydays list before exiting */ kthread_stop(rescuer->task); @@ -4359,10 +4339,10 @@ unsigned int work_busy(struct work_struct *work) rcu_read_lock(); pool = get_work_pool(work); if (pool) { - spin_lock_irqsave(&pool->lock, flags); + raw_spin_lock_irqsave(&pool->lock, flags); if (find_worker_executing_work(pool, work)) ret |= WORK_BUSY_RUNNING; - spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pool->lock, flags); } rcu_read_unlock(); @@ -4389,9 +4369,9 @@ void set_worker_desc(const char *fmt, ...) va_start(args, fmt); vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); va_end(args); - worker->desc_valid = true; } } +EXPORT_SYMBOL_GPL(set_worker_desc); /** * print_worker_info - print out worker information and description @@ -4413,7 +4393,6 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) char desc[WORKER_DESC_LEN] = { }; struct pool_workqueue *pwq = NULL; struct workqueue_struct *wq = NULL; - bool desc_valid = false; struct worker *worker; if (!(task->flags & PF_WQ_WORKER)) @@ -4426,22 +4405,18 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) worker = kthread_probe_data(task); /* - * Carefully copy the associated workqueue's workfn and name. Keep - * the original last '\0' in case the original contains garbage. + * Carefully copy the associated workqueue's workfn, name and desc. + * Keep the original last '\0' in case the original is garbage. */ probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); probe_kernel_read(name, wq->name, sizeof(name) - 1); - - /* copy worker description */ - probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid)); - if (desc_valid) - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %pf", log_lvl, name, fn); - if (desc[0]) + if (strcmp(name, desc)) pr_cont(" (%s)", desc); pr_cont("\n"); } @@ -4574,10 +4549,10 @@ void show_workqueue_state(void) pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); for_each_pwq(pwq, wq) { - spin_lock_irqsave(&pwq->pool->lock, flags); + raw_spin_lock_irqsave(&pwq->pool->lock, flags); if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); - spin_unlock_irqrestore(&pwq->pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_workqueue_state(). Avoid triggering @@ -4591,7 +4566,7 @@ void show_workqueue_state(void) struct worker *worker; bool first = true; - spin_lock_irqsave(&pool->lock, flags); + raw_spin_lock_irqsave(&pool->lock, flags); if (pool->nr_workers == pool->nr_idle) goto next_pool; @@ -4610,7 +4585,7 @@ void show_workqueue_state(void) } pr_cont("\n"); next_pool: - spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_workqueue_state(). Avoid triggering @@ -4622,6 +4597,47 @@ void show_workqueue_state(void) rcu_read_unlock(); } +/* used to show worker information through /proc/PID/{comm,stat,status} */ +void wq_worker_comm(char *buf, size_t size, struct task_struct *task) +{ + int off; + + /* always show the actual comm */ + off = strscpy(buf, task->comm, size); + if (off < 0) + return; + + /* stabilize PF_WQ_WORKER and worker pool association */ + mutex_lock(&wq_pool_attach_mutex); + + if (task->flags & PF_WQ_WORKER) { + struct worker *worker = kthread_data(task); + struct worker_pool *pool = worker->pool; + + if (pool) { + raw_spin_lock_irq(&pool->lock); + /* + * ->desc tracks information (wq name or + * set_worker_desc()) for the latest execution. If + * current, prepend '+', otherwise '-'. + */ + if (worker->desc[0] != '\0') { + if (worker->current_work) + scnprintf(buf + off, size - off, "+%s", + worker->desc); + else + scnprintf(buf + off, size - off, "-%s", + worker->desc); + } + raw_spin_unlock_irq(&pool->lock); + } + } + + mutex_unlock(&wq_pool_attach_mutex); +} + +#ifdef CONFIG_SMP + /* * CPU hotplug. * @@ -4637,15 +4653,14 @@ void show_workqueue_state(void) * cpu comes back online. */ -static void wq_unbind_fn(struct work_struct *work) +static void unbind_workers(int cpu) { - int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; for_each_cpu_worker_pool(pool, cpu) { - mutex_lock(&pool->attach_mutex); - spin_lock_irq(&pool->lock); + mutex_lock(&wq_pool_attach_mutex); + raw_spin_lock_irq(&pool->lock); /* * We've blocked all attach/detach operations. Make all workers @@ -4659,8 +4674,8 @@ static void wq_unbind_fn(struct work_struct *work) pool->flags |= POOL_DISASSOCIATED; - spin_unlock_irq(&pool->lock); - mutex_unlock(&pool->attach_mutex); + raw_spin_unlock_irq(&pool->lock); + mutex_unlock(&wq_pool_attach_mutex); /* * Call schedule() so that we cross rq->lock and thus can @@ -4685,9 +4700,9 @@ static void wq_unbind_fn(struct work_struct *work) * worker blocking could lead to lengthy stalls. Kick off * unbound chain execution of currently pending work items. */ - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); wake_up_worker(pool); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } } @@ -4701,7 +4716,7 @@ static void rebind_workers(struct worker_pool *pool) { struct worker *worker; - lockdep_assert_held(&pool->attach_mutex); + lockdep_assert_held(&wq_pool_attach_mutex); /* * Restore CPU affinity of all workers. As all idle workers should @@ -4714,17 +4729,7 @@ static void rebind_workers(struct worker_pool *pool) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask) < 0); - spin_lock_irq(&pool->lock); - - /* - * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED - * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is - * being reworked and this can go away in time. - */ - if (!(pool->flags & POOL_DISASSOCIATED)) { - spin_unlock_irq(&pool->lock); - return; - } + raw_spin_lock_irq(&pool->lock); pool->flags &= ~POOL_DISASSOCIATED; @@ -4751,7 +4756,7 @@ static void rebind_workers(struct worker_pool *pool) * concurrency management. Note that when or whether * @worker clears REBOUND doesn't affect correctness. * - * ACCESS_ONCE() is necessary because @worker->flags may be + * WRITE_ONCE() is necessary because @worker->flags may be * tested without holding any lock in * wq_worker_waking_up(). Without it, NOT_RUNNING test may * fail incorrectly leading to premature concurrency @@ -4760,10 +4765,10 @@ static void rebind_workers(struct worker_pool *pool) WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND)); worker_flags |= WORKER_REBOUND; worker_flags &= ~WORKER_UNBOUND; - ACCESS_ONCE(worker->flags) = worker_flags; + WRITE_ONCE(worker->flags, worker_flags); } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } /** @@ -4781,7 +4786,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) static cpumask_t cpumask; struct worker *worker; - lockdep_assert_held(&pool->attach_mutex); + lockdep_assert_held(&wq_pool_attach_mutex); /* is @cpu allowed for @pool? */ if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) @@ -4816,14 +4821,14 @@ int workqueue_online_cpu(unsigned int cpu) mutex_lock(&wq_pool_mutex); for_each_pool(pool, pi) { - mutex_lock(&pool->attach_mutex); + mutex_lock(&wq_pool_attach_mutex); if (pool->cpu == cpu) rebind_workers(pool); else if (pool->cpu < 0) restore_unbound_workers_cpumask(pool, cpu); - mutex_unlock(&pool->attach_mutex); + mutex_unlock(&wq_pool_attach_mutex); } /* update NUMA affinity of unbound workqueues */ @@ -4836,12 +4841,13 @@ int workqueue_online_cpu(unsigned int cpu) int workqueue_offline_cpu(unsigned int cpu) { - struct work_struct unbind_work; struct workqueue_struct *wq; /* unbinding per-cpu workers should happen on the local CPU */ - INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); - queue_work_on(cpu, system_highpri_wq, &unbind_work); + if (WARN_ON(cpu != smp_processor_id())) + return -1; + + unbind_workers(cpu); /* update NUMA affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); @@ -4849,14 +4855,9 @@ int workqueue_offline_cpu(unsigned int cpu) wq_update_unbound_numa(wq, cpu, false); mutex_unlock(&wq_pool_mutex); - /* wait for per-cpu unbinding to finish */ - flush_work(&unbind_work); - destroy_work_on_stack(&unbind_work); return 0; } -#ifdef CONFIG_SMP - struct work_for_cpu { struct work_struct work; long (*fn)(void *); @@ -5081,12 +5082,22 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) int ret = -EINVAL; cpumask_var_t saved_cpumask; - if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) - return -ENOMEM; - + /* + * Not excluding isolated cpus on purpose. + * If the user wishes to include them, we allow that. + */ cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); + if (cpumask_equal(cpumask, wq_unbound_cpumask)) { + ret = 0; + goto out_unlock; + } + + if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out_unlock; + } /* save the old wq_unbound_cpumask. */ cpumask_copy(saved_cpumask, wq_unbound_cpumask); @@ -5099,10 +5110,11 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (ret < 0) cpumask_copy(wq_unbound_cpumask, saved_cpumask); + free_cpumask_var(saved_cpumask); +out_unlock: apply_wqattrs_unlock(); } - free_cpumask_var(saved_cpumask); return ret; } @@ -5117,9 +5129,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) * * Unbound workqueues have the following extra attributes. * - * id RO int : the associated pool ID + * pool_ids RO int : the associated pool IDs for each node * nice RW int : nice value of the workers * cpumask RW mask : bitmask of allowed CPUs for the workers + * numa RW bool : whether enable NUMA affinity */ struct wq_device { struct workqueue_struct *wq; @@ -5214,7 +5227,7 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) lockdep_assert_held(&wq_pool_mutex); - attrs = alloc_workqueue_attrs(GFP_KERNEL); + attrs = alloc_workqueue_attrs(); if (!attrs) return NULL; @@ -5645,7 +5658,7 @@ static void __init wq_numa_init(void) return; } - wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL); + wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!wq_update_unbound_numa_attrs_buf); /* @@ -5653,7 +5666,7 @@ static void __init wq_numa_init(void) * available. Build one from cpu_to_node() which should have been * fully initialized by now. */ - tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL); + tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL); BUG_ON(!tbl); for_each_node(node) @@ -5719,7 +5732,7 @@ int __init workqueue_init_early(void) for (i = 0; i < NR_STD_WORKER_POOLS; i++) { struct workqueue_attrs *attrs; - BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); + BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; unbound_std_wq_attrs[i] = attrs; @@ -5728,7 +5741,7 @@ int __init workqueue_init_early(void) * guaranteed by max_active which is enforced by pwqs. * Turn off NUMA so that dfl_pwq is used for all nodes. */ - BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); + BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; attrs->no_numa = true; ordered_wq_attrs[i] = attrs; @@ -5775,6 +5788,8 @@ int __init workqueue_init(void) * archs such as power and arm64. As per-cpu pools created * previously could be missing node hint and unbound pools NUMA * affinity, fix them up. + * + * Also, while iterating workqueues, create rescuers if requested. */ wq_numa_init(); @@ -5786,8 +5801,12 @@ int __init workqueue_init(void) } } - list_for_each_entry(wq, &workqueues, list) + list_for_each_entry(wq, &workqueues, list) { wq_update_unbound_numa(wq, smp_processor_id(), true); + WARN(init_rescuer(wq), + "workqueue: failed to create early rescuer for %s", + wq->name); + } mutex_unlock(&wq_pool_mutex); -- 2.37.3