Add a facility to the workqueue subsystem whereby an atomic_t can be registered by a work function such that the work function dispatcher will decrement the atomic after the work function has returned and then call wake_up_atomic() on it if it reached 0. This is analogous to complete_and_exit() for kernel threads and is used to avoid a race between notifying that a work item is about to finish and the .text segment from a module being discarded. The way this is used is that the work function calls: dec_after_work(atomic_t *counter); to register the counter and then process_one_work() calls it, potentially wakes it and clears the registration. The reason I've used an atomic_t rather than a completion is that (1) it takes up less space and (2) it can monitor multiple objects. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Tejun Heo <tj@xxxxxxxxxx> cc: Lai Jiangshan <jiangshanlai@xxxxxxxxx> --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 25 +++++++++++++++++++++++++ kernel/workqueue_internal.h | 1 + 3 files changed, 27 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index db6dc9dc0482..ceaed1387e9b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -451,6 +451,7 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); +extern void dec_after_work(atomic_t *counter); extern int schedule_on_each_cpu(work_func_t func); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca937b0c3a96..2936ad0ab293 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2112,6 +2112,12 @@ __acquires(&pool->lock) dump_stack(); } + if (worker->dec_after) { + if (atomic_dec_and_test(worker->dec_after)) + wake_up_atomic_t(worker->dec_after); + worker->dec_after = NULL; + } + /* * The following prevents a kworker from hogging CPU on !PREEMPT * kernels, where a requeueing work item waiting for something to @@ -3087,6 +3093,25 @@ int schedule_on_each_cpu(work_func_t func) } /** + * dec_after_work - Register counter to dec and wake after work func returns + * @counter: The counter to decrement and wake + * + * Register an atomic counter to be decremented after a work function returns + * to the core. The counter is 'woken' if it is decremented to 0. This allows + * synchronisation to be effected by one or more work functions in a module + * without leaving a window in which the work function code can be unloaded. + */ +void dec_after_work(atomic_t *counter) +{ + struct worker *worker = current_wq_worker(); + + BUG_ON(!worker); + BUG_ON(worker->dec_after); + worker->dec_after = counter; +} +EXPORT_SYMBOL(dec_after_work); + +/** * execute_in_process_context - reliably execute the routine with user context * @fn: the function to execute * @ew: guaranteed storage for the execute work structure (must diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 8635417c587b..94ea1ca9b01f 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -28,6 +28,7 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ + atomic_t *dec_after; /* Decrement after func returns */ struct pool_workqueue *current_pwq; /* L: current_work's pwq */ bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* L: scheduled works */