The following commit has been merged into the sched/core branch of tip: Commit-ID: f12e148892ede8d9ee82bcd3e469e6d01fc077ac Gitweb: https://git.kernel.org/tip/f12e148892ede8d9ee82bcd3e469e6d01fc077ac Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Thu, 23 May 2024 11:26:25 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Sat, 17 Aug 2024 11:06:43 +02:00 sched/fair: Prepare pick_next_task() for delayed dequeue Delayed dequeue's natural end is when it gets picked again. Ensure pick_next_task() knows what to do with delayed tasks. Note, this relies on the earlier patch that made pick_next_task() state invariant -- it will restart the pick on dequeue, because obviously the just dequeued task is no longer eligible. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx> Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240727105029.747330118@xxxxxxxxxxxxx --- kernel/sched/fair.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9a84903..a4f1f79 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5473,6 +5473,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } +static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags); + /* * Pick the next process, keeping these things in mind, in this order: * 1) keep things fair between processes/task groups @@ -5481,16 +5483,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * 4) do not run the "skip" process, if something else is available */ static struct sched_entity * -pick_next_entity(struct cfs_rq *cfs_rq) +pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq) { /* * Enabling NEXT_BUDDY will affect latency but not fairness. */ if (sched_feat(NEXT_BUDDY) && - cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) + cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) { + /* ->next will never be delayed */ + SCHED_WARN_ON(cfs_rq->next->sched_delayed); return cfs_rq->next; + } + + struct sched_entity *se = pick_eevdf(cfs_rq); + if (se->sched_delayed) { + dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED); + SCHED_WARN_ON(se->sched_delayed); + SCHED_WARN_ON(se->on_rq); - return pick_eevdf(cfs_rq); + return NULL; + } + return se; } static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); @@ -8507,7 +8520,9 @@ again: if (unlikely(check_cfs_rq_runtime(cfs_rq))) goto again; - se = pick_next_entity(cfs_rq); + se = pick_next_entity(rq, cfs_rq); + if (!se) + goto again; cfs_rq = group_cfs_rq(se); } while (cfs_rq);