On Thu, Feb 20, 2020 at 06:22:02PM +0100, Oleg Nesterov wrote: > On 02/20, Peter Zijlstra wrote: > > + work = READ_ONCE(task->task_works); > > + if (!work) { > > + if (!(task->flags & PF_EXITING)) > > + return; > > + > > + /* > > + * work->func() can do task_work_add(), do not set > > + * work_exited unless the list is empty. > > + */ > > + if (try_cmpxchg(&task->task_works, &work, &work_exited)) > > + return; > > + } > > + > > + work = xchg(&task->task_works, NULL); > > > > - if (!work) > > - break; > > You can't remove the "if (!work)" check, cancel_task_work() can remove > a single callback between READ_ONCE() and xchg(). Argh! > Oleg. > > --- a/kernel/task_work.c > +++ b/kernel/task_work.c > @@ -27,14 +27,11 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ > int > task_work_add(struct task_struct *task, struct callback_head *work, bool notify) > { > - struct callback_head *head; > - > + work->next = READ_ONCE(task->task_works); > do { > - head = READ_ONCE(task->task_works); > - if (unlikely(head == &work_exited)) > + if (unlikely(work->next == &work_exited)) > return -ESRCH; > - work->next = head; > - } while (cmpxchg(&task->task_works, head, work) != head); > + } while (!try_cmpxchg(&task->task_works, &work->next, work)); > > if (notify) > set_notify_resume(task); Cute, that should work. > @@ -68,10 +65,10 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) > * we raced with task_work_run(), *pprev == NULL/exited. > */ > raw_spin_lock_irqsave(&task->pi_lock, flags); > + for (work = READ_ONCE(*pprev); work; ) { > if (work->func != func) > pprev = &work->next; But didn't you loose the READ_ONCE() of *pprev in this branch? > + else if (try_cmpxchg(pprev, &work, work->next)) > break; > } > raw_spin_unlock_irqrestore(&task->pi_lock, flags); > @@ -97,16 +94,16 @@ void task_work_run(void) > * work->func() can do task_work_add(), do not set > * work_exited unless the list is empty. > */ > + work = READ_ONCE(task->task_works); > do { > head = NULL; > if (!work) { > if (task->flags & PF_EXITING) > head = &work_exited; > else > break; > } > + } while (!try_cmpxchg(&task->task_works, &work, head)); > > if (!work) > break; But given that, as you say, cancel() could have gone and stole our head, should we not try and install &work_exiting when PF_EXITING in that case? That is; should we not do continue in that case, instead of break. --- --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -27,14 +27,13 @@ static struct callback_head work_exited; int task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { - struct callback_head *head; + struct callback_head *head = READ_ONCE(tsk->task_works); do { - head = READ_ONCE(task->task_works); if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; - } while (cmpxchg(&task->task_works, head, work) != head); + } while (!try_cmpxchg(&task->task_works, &head, work)) if (notify) set_notify_resume(task); @@ -69,9 +68,12 @@ task_work_cancel(struct task_struct *tas */ raw_spin_lock_irqsave(&task->pi_lock, flags); while ((work = READ_ONCE(*pprev))) { - if (work->func != func) + if (work->func != func) { pprev = &work->next; - else if (cmpxchg(pprev, work, work->next) == work) + continue; + } + + if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); @@ -90,26 +92,26 @@ task_work_cancel(struct task_struct *tas void task_work_run(void) { struct task_struct *task = current; - struct callback_head *work, *head, *next; + struct callback_head *work, *next; for (;;) { - /* - * work->func() can do task_work_add(), do not set - * work_exited unless the list is empty. - */ - do { - head = NULL; - work = READ_ONCE(task->task_works); - if (!work) { - if (task->flags & PF_EXITING) - head = &work_exited; - else - break; - } - } while (cmpxchg(&task->task_works, work, head) != work); + work = READ_ONCE(task->task_works); + if (!work) { + if (!(task->flags & PF_EXITING)) + return; + + /* + * work->func() can do task_work_add(), do not set + * work_exited unless the list is empty. + */ + if (try_cmpxchg(&task->task_works, &work, &work_exited)) + return; + } + + work = xchg(&task->task_works, NULL); + if (!work) + continue; - if (!work) - break; /* * Synchronize with task_work_cancel(). It can not remove * the first entry == work, cmpxchg(task_works) must fail.