The following commit has been merged into the sched/core branch of tip: Commit-ID: 781773e3b68031bd001c0c18aa72e8470c225ebd Gitweb: https://git.kernel.org/tip/781773e3b68031bd001c0c18aa72e8470c225ebd Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Thu, 23 May 2024 11:57:43 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Sat, 17 Aug 2024 11:06:43 +02:00 sched/fair: Implement ENQUEUE_DELAYED Doing a wakeup on a delayed dequeue task is about as simple as it sounds -- remove the delayed mark and enjoy the fact it was actually still on the runqueue. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx> Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240727105029.888107381@xxxxxxxxxxxxx --- kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a4f1f79..25b14df 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5290,6 +5290,9 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq); static inline bool cfs_bandwidth_used(void); static void +requeue_delayed_entity(struct sched_entity *se); + +static void enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { bool curr = cfs_rq->curr == se; @@ -5922,8 +5925,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); - if (se->on_rq) + if (se->on_rq) { + SCHED_WARN_ON(se->sched_delayed); break; + } enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP); if (cfs_rq_is_idle(group_cfs_rq(se))) @@ -6773,6 +6778,22 @@ static int sched_idle_cpu(int cpu) } #endif +static void +requeue_delayed_entity(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + /* + * se->sched_delayed should imply: se->on_rq == 1. + * Because a delayed entity is one that is still on + * the runqueue competing until elegibility. + */ + SCHED_WARN_ON(!se->sched_delayed); + SCHED_WARN_ON(!se->on_rq); + + se->sched_delayed = 0; +} + /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -6787,6 +6808,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) int task_new = !(flags & ENQUEUE_WAKEUP); int rq_h_nr_running = rq->cfs.h_nr_running; + if (flags & ENQUEUE_DELAYED) { + requeue_delayed_entity(se); + return; + } + /* * The code below (indirectly) updates schedutil which looks at * the cfs_rq utilization to select a frequency. @@ -6804,8 +6830,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); for_each_sched_entity(se) { - if (se->on_rq) + if (se->on_rq) { + if (se->sched_delayed) + requeue_delayed_entity(se); break; + } cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, flags);