On 05/20, Andrew Morton wrote: > > --- a/kernel/kthread.c~kthread-fix-kthread_mod_delayed_work-vs-kthread_cancel_delayed_work_sync-race > +++ a/kernel/kthread.c > @@ -1181,6 +1181,19 @@ bool kthread_mod_delayed_work(struct kth > goto out; > > ret = __kthread_cancel_work(work, true, &flags); > + > + /* > + * Canceling could run in parallel from kthread_cancel_delayed_work_sync > + * and change work's canceling count as the spinlock is released and regain > + * in __kthread_cancel_work so we need to check the count again. Otherwise, > + * we might incorrectly queue the dwork and further cause > + * cancel_delayed_work_sync thread waiting for flush dwork endlessly. > + */ > + if (work->canceling) { > + ret = false; > + goto out; > + } > + > fast_queue: > __kthread_queue_delayed_work(worker, dwork, delay); Never looked at this code before, can't review... but note that another caller of __kthread_queue_delayed_work() needs to check work->canceling too. So perhaps we should simply add queuing_blocked() into __kthread_queue_delayed_work() ? Something like below, uncompiled/untested, most probably incorrect. Either way, this comment * Return: %true if @dwork was pending and its timer was modified, * %false otherwise. above kthread_mod_delayed_work looks obviously wrong. Currently it returns true if this work was pending. With your patch it returns true if it was pending and not canceling. With the patch below it returns true if the work was (re)queued successfully, and this makes more sense to me. But again, I can easily misread this code. In any case, even if my patch is correct, I won't insist, your fix is much simpler. Oleg. --- x/kernel/kthread.c +++ x/kernel/kthread.c @@ -977,7 +977,7 @@ void kthread_delayed_work_timer_fn(struc } EXPORT_SYMBOL(kthread_delayed_work_timer_fn); -static void __kthread_queue_delayed_work(struct kthread_worker *worker, +static bool __kthread_queue_delayed_work(struct kthread_worker *worker, struct kthread_delayed_work *dwork, unsigned long delay) { @@ -987,6 +987,9 @@ static void __kthread_queue_delayed_work WARN_ON_FUNCTION_MISMATCH(timer->function, kthread_delayed_work_timer_fn); + if (queuing_blocked(worker, work)) + return false; + /* * If @delay is 0, queue @dwork->work immediately. This is for * both optimization and correctness. The earliest @timer can @@ -995,7 +998,7 @@ static void __kthread_queue_delayed_work */ if (!delay) { kthread_insert_work(worker, work, &worker->work_list); - return; + return true; } /* Be paranoid and try to detect possible races already now. */ @@ -1005,6 +1008,7 @@ static void __kthread_queue_delayed_work work->worker = worker; timer->expires = jiffies + delay; add_timer(timer); + return true; } /** @@ -1028,16 +1032,12 @@ bool kthread_queue_delayed_work(struct k { struct kthread_work *work = &dwork->work; unsigned long flags; - bool ret = false; + bool ret; raw_spin_lock_irqsave(&worker->lock, flags); - - if (!queuing_blocked(worker, work)) { - __kthread_queue_delayed_work(worker, dwork, delay); - ret = true; - } - + ret = __kthread_queue_delayed_work(worker, dwork, delay); raw_spin_unlock_irqrestore(&worker->lock, flags); + return ret; } EXPORT_SYMBOL_GPL(kthread_queue_delayed_work); @@ -1180,9 +1180,9 @@ bool kthread_mod_delayed_work(struct kth if (work->canceling) goto out; - ret = __kthread_cancel_work(work, true, &flags); + __kthread_cancel_work(work, true, &flags); fast_queue: - __kthread_queue_delayed_work(worker, dwork, delay); + ret = __kthread_queue_delayed_work(worker, dwork, delay); out: raw_spin_unlock_irqrestore(&worker->lock, flags); return ret;