The following commit has been merged into the sched/core branch of tip: Commit-ID: 75b6499024a6c1a4ef0288f280534a5c54269076 Gitweb: https://git.kernel.org/tip/75b6499024a6c1a4ef0288f280534a5c54269076 Author: Valentin Schneider <vschneid@xxxxxxxxxx> AuthorDate: Thu, 29 Aug 2024 15:53:53 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Tue, 03 Sep 2024 15:26:30 +02:00 sched/fair: Properly deactivate sched_delayed task upon class change __sched_setscheduler() goes through an enqueue/dequeue cycle like so: flags := DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; prev_class->dequeue_task(rq, p, flags); new_class->enqueue_task(rq, p, flags); when prev_class := fair_sched_class, this is followed by: dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP); the idea being that since the task has switched classes, we need to drop the sched_delayed logic and have that task be deactivated per its previous dequeue_task(..., DEQUEUE_SLEEP). Unfortunately, this leaves the task on_rq. This is missing the tail end of dequeue_entities() that issues __block_task(), which __sched_setscheduler() won't have done due to not using DEQUEUE_DELAYED - not that it should, as it is pretty much a fair_sched_class specific thing. Make switched_from_fair() properly deactivate sched_delayed tasks upon class changes via __block_task(), as if a dequeue_task(..., DEQUEUE_DELAYED) had been issued. Fixes: 2e0199df252a ("sched/fair: Prepare exit/cleanup paths for delayed_dequeue") Reported-by: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Reported-by: Chen Yu <yu.c.chen@xxxxxxxxx> Signed-off-by: Valentin Schneider <vschneid@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20240829135353.1524260-1-vschneid@xxxxxxxxxx --- kernel/sched/fair.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fea057b..3a3286d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5456,6 +5456,13 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); +static inline void finish_delayed_dequeue_entity(struct sched_entity *se) +{ + se->sched_delayed = 0; + if (sched_feat(DELAY_ZERO) && se->vlag > 0) + se->vlag = 0; +} + static bool dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { @@ -5531,11 +5538,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); - if (flags & DEQUEUE_DELAYED) { - se->sched_delayed = 0; - if (sched_feat(DELAY_ZERO) && se->vlag > 0) - se->vlag = 0; - } + if (flags & DEQUEUE_DELAYED) + finish_delayed_dequeue_entity(se); if (cfs_rq->nr_running == 0) update_idle_cfs_rq_clock_pelt(cfs_rq); @@ -13107,11 +13111,16 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) * and we cannot use DEQUEUE_DELAYED. */ if (p->se.sched_delayed) { + /* First, dequeue it from its new class' structures */ dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP); - p->se.sched_delayed = 0; + /* + * Now, clean up the fair_sched_class side of things + * related to sched_delayed being true and that wasn't done + * due to the generic dequeue not using DEQUEUE_DELAYED. + */ + finish_delayed_dequeue_entity(&p->se); p->se.rel_deadline = 0; - if (sched_feat(DELAY_ZERO) && p->se.vlag > 0) - p->se.vlag = 0; + __block_task(rq, p); } }