The following commit has been merged into the sched/core branch of tip: Commit-ID: fab4a808ba9fb59b691d7096eed9b1494812ffd6 Gitweb: https://git.kernel.org/tip/fab4a808ba9fb59b691d7096eed9b1494812ffd6 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Wed, 03 Apr 2024 09:50:41 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Sat, 17 Aug 2024 11:06:41 +02:00 sched/fair: Re-organize dequeue_task_fair() Working towards delaying dequeue, notably also inside the hierachy, rework dequeue_task_fair() such that it can 'resume' an interrupted hierarchy walk. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx> Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240727105028.977256873@xxxxxxxxxxxxx --- kernel/sched/fair.c | 62 +++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03f76b3..59b00d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6861,34 +6861,43 @@ enqueue_throttle: static void set_next_buddy(struct sched_entity *se); /* - * The dequeue_task method is called before nr_running is - * decreased. We remove the task from the rbtree and - * update the fair scheduling stats: + * Basically dequeue_task_fair(), except it can deal with dequeue_entity() + * failing half-way through and resume the dequeue later. + * + * Returns: + * -1 - dequeue delayed + * 0 - dequeue throttled + * 1 - dequeue complete */ -static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) { - struct cfs_rq *cfs_rq; - struct sched_entity *se = &p->se; - int task_sleep = flags & DEQUEUE_SLEEP; - int idle_h_nr_running = task_has_idle_policy(p); bool was_sched_idle = sched_idle_rq(rq); int rq_h_nr_running = rq->cfs.h_nr_running; + bool task_sleep = flags & DEQUEUE_SLEEP; + struct task_struct *p = NULL; + int idle_h_nr_running = 0; + int h_nr_running = 0; + struct cfs_rq *cfs_rq; - util_est_dequeue(&rq->cfs, p); + if (entity_is_task(se)) { + p = task_of(se); + h_nr_running = 1; + idle_h_nr_running = task_has_idle_policy(p); + } for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); - cfs_rq->h_nr_running--; + cfs_rq->h_nr_running -= h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running; if (cfs_rq_is_idle(cfs_rq)) - idle_h_nr_running = 1; + idle_h_nr_running = h_nr_running; /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) - goto dequeue_throttle; + return 0; /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -6912,20 +6921,18 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) se_update_runnable(se); update_cfs_group(se); - cfs_rq->h_nr_running--; + cfs_rq->h_nr_running -= h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running; if (cfs_rq_is_idle(cfs_rq)) - idle_h_nr_running = 1; + idle_h_nr_running = h_nr_running; /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) - goto dequeue_throttle; - + return 0; } - /* At this point se is NULL and we are at root level*/ - sub_nr_running(rq, 1); + sub_nr_running(rq, h_nr_running); if (rq_h_nr_running && !rq->cfs.h_nr_running) dl_server_stop(&rq->fair_server); @@ -6934,10 +6941,23 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (unlikely(!was_sched_idle && sched_idle_rq(rq))) rq->next_balance = jiffies; -dequeue_throttle: - util_est_update(&rq->cfs, p, task_sleep); - hrtick_update(rq); + return 1; +} + +/* + * The dequeue_task method is called before nr_running is + * decreased. We remove the task from the rbtree and + * update the fair scheduling stats: + */ +static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) +{ + util_est_dequeue(&rq->cfs, p); + + if (dequeue_entities(rq, &p->se, flags) < 0) + return false; + util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP); + hrtick_update(rq); return true; }