Clarify and fix task_try_func(). Move the smp_rmb() up to avoid re-loading p->on_rq in the false case, but add a p->on_rq reload after acquiring rq->lock, after all, it could have gotten dequeued while waiting for the lock. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> --- kernel/sched/core.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4005,7 +4005,7 @@ try_to_wake_up(struct task_struct *p, un * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in * __schedule(). See the comment for smp_mb__after_spinlock(). * - * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + * A similar smb_rmb() lives in task_try_func(). */ smp_rmb(); if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) @@ -4124,25 +4124,48 @@ try_to_wake_up(struct task_struct *p, un */ int task_try_func(struct task_struct *p, task_try_f func, void *arg) { + unsigned int state; struct rq_flags rf; - int ret = -EAGAIN; /* raced, try again later */ struct rq *rq; + int ret = -EAGAIN; /* raced, try again later */ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + + state = READ_ONCE(p->__state); + + /* + * Ensure we load p->on_rq after p->__state, otherwise it would be + * possible to, falsely, observe p->on_rq == 0. + * + * See try_to_wake_up() for a longer comment. + */ + smp_rmb(); + if (p->on_rq) { rq = __task_rq_lock(p, &rf); - if (task_rq(p) == rq) + + /* re-check p->on_rq now that we hold rq->lock */ + if (p->on_rq && task_rq(p) == rq) ret = func(p, arg); + rq_unlock(rq, &rf); + } else { - switch (READ_ONCE(p->__state)) { + + switch (state) { case TASK_RUNNING: case TASK_WAKING: + /* + * We raced against wakeup, try again later. + */ break; + default: - smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). - if (!p->on_rq) - ret = func(p, arg); + /* + * Since we hold ->pi_lock, we serialize against + * try_to_wake_up() and any blocked state must remain. + */ + ret = func(p, arg); } } raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);