The following commit has been merged into the sched/urgent branch of tip: Commit-ID: d68803506ffb4f72cbeaea94a3a745a6faf62bdd Gitweb: https://git.kernel.org/tip/d68803506ffb4f72cbeaea94a3a745a6faf62bdd Author: Mike Galbraith <efault@xxxxxx> AuthorDate: Fri, 08 Nov 2024 01:24:35 +01:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Mon, 11 Nov 2024 11:49:44 +01:00 sched/fair: Dequeue sched_delayed tasks when waking to a busy CPU Phil Auld (Redhat) reported an fio benchmark regression having been found to have been caused by addition of the DELAY_DEQUEUE feature, suggested it may be related to wakees losing the ability to migrate, and confirmed that restoration of same indeed did restore previous performance. (de-uglified-a-lot-by) Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Reported-by: Phil Auld <pauld@xxxxxxxxxx> Suggested-by: Phil Auld <pauld@xxxxxxxxxx> Reviewed-by: Phil Auld <pauld@xxxxxxxxxx> Tested-by: Jirka Hladky <jhladky@xxxxxxxxxx> Signed-off-by: Mike Galbraith <efault@xxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lore.kernel.org/lkml/20241101124715.GA689589@pauld.westford.= kernel/sched/core.c | 48 +++++++++++++++++++++++++++++------------------- kernel/sched/sched.h | 5 +++++ 2 files changed, 34 insertions(+), 19 deletions(-) --- kernel/sched/core.c | 46 ++++++++++++++++++++++++++----------------- kernel/sched/sched.h | 5 +++++- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 719e0ed..b35752f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3734,28 +3734,38 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, */ static int ttwu_runnable(struct task_struct *p, int wake_flags) { - struct rq_flags rf; - struct rq *rq; - int ret = 0; + CLASS(__task_rq_lock, rq_guard)(p); + struct rq *rq = rq_guard.rq; - rq = __task_rq_lock(p, &rf); - if (task_on_rq_queued(p)) { - update_rq_clock(rq); - if (p->se.sched_delayed) - enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED); - if (!task_on_cpu(rq, p)) { - /* - * When on_rq && !on_cpu the task is preempted, see if - * it should preempt the task that is current now. - */ - wakeup_preempt(rq, p, wake_flags); + if (!task_on_rq_queued(p)) + return 0; + + update_rq_clock(rq); + if (p->se.sched_delayed) { + int queue_flags = ENQUEUE_DELAYED | ENQUEUE_NOCLOCK; + + /* + * Since sched_delayed means we cannot be current anywhere, + * dequeue it here and have it fall through to the + * select_task_rq() case further along the ttwu() path. + */ + if (rq->nr_running > 1 && p->nr_cpus_allowed > 1) { + dequeue_task(rq, p, DEQUEUE_SLEEP | queue_flags); + return 0; } - ttwu_do_wakeup(p); - ret = 1; + + enqueue_task(rq, p, queue_flags); } - __task_rq_unlock(rq, &rf); + if (!task_on_cpu(rq, p)) { + /* + * When on_rq && !on_cpu the task is preempted, see if + * it should preempt the task that is current now. + */ + wakeup_preempt(rq, p, wake_flags); + } + ttwu_do_wakeup(p); - return ret; + return 1; } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6c54a57..97f7936 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1779,6 +1779,11 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } +DEFINE_LOCK_GUARD_1(__task_rq_lock, struct task_struct, + _T->rq = __task_rq_lock(_T->lock, &_T->rf), + __task_rq_unlock(_T->rq, &_T->rf), + struct rq *rq; struct rq_flags rf) + DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct, _T->rq = task_rq_lock(_T->lock, &_T->rf), task_rq_unlock(_T->rq, _T->lock, &_T->rf),