The following commit has been merged into the sched/core branch of tip: Commit-ID: 3b3dd89b8bb0f03657859c22c86c19224f778638 Gitweb: https://git.kernel.org/tip/3b3dd89b8bb0f03657859c22c86c19224f778638 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Wed, 03 Apr 2024 09:50:16 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Sat, 17 Aug 2024 11:06:41 +02:00 sched/fair: Unify pick_{,next_}_task_fair() Implement pick_next_task_fair() in terms of pick_task_fair() to de-duplicate the pick loop. More importantly, this makes all the pick loops use the state-invariant form, which is useful to introduce further re-try conditions in later patches. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx> Tested-by: Valentin Schneider <vschneid@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240727105028.725062368@xxxxxxxxxxxxx --- kernel/sched/fair.c | 60 +++++--------------------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 175ccec..1452c53 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8451,7 +8451,6 @@ preempt: resched_curr(rq); } -#ifdef CONFIG_SMP static struct task_struct *pick_task_fair(struct rq *rq) { struct sched_entity *se; @@ -8463,7 +8462,7 @@ again: return NULL; do { - /* When we pick for a remote RQ, we'll not have done put_prev_entity() */ + /* Might not have done put_prev_entity() */ if (cfs_rq->curr && cfs_rq->curr->on_rq) update_curr(cfs_rq); @@ -8484,19 +8483,19 @@ again: return task_of(se); } -#endif struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { - struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; struct task_struct *p; int new_tasks; again: - if (!sched_fair_runnable(rq)) + p = pick_task_fair(rq); + if (!p) goto idle; + se = &p->se; #ifdef CONFIG_FAIR_GROUP_SCHED if (!prev || prev->sched_class != &fair_sched_class) @@ -8508,52 +8507,14 @@ again: * * Therefore attempt to avoid putting and setting the entire cgroup * hierarchy, only change the part that actually changes. - */ - - do { - struct sched_entity *curr = cfs_rq->curr; - - /* - * Since we got here without doing put_prev_entity() we also - * have to consider cfs_rq->curr. If it is still a runnable - * entity, update_curr() will update its vruntime, otherwise - * forget we've ever seen it. - */ - if (curr) { - if (curr->on_rq) - update_curr(cfs_rq); - else - curr = NULL; - - /* - * This call to check_cfs_rq_runtime() will do the - * throttle and dequeue its entity in the parent(s). - * Therefore the nr_running test will indeed - * be correct. - */ - if (unlikely(check_cfs_rq_runtime(cfs_rq))) { - cfs_rq = &rq->cfs; - - if (!cfs_rq->nr_running) - goto idle; - - goto simple; - } - } - - se = pick_next_entity(cfs_rq); - cfs_rq = group_cfs_rq(se); - } while (cfs_rq); - - p = task_of(se); - - /* + * * Since we haven't yet done put_prev_entity and if the selected task * is a different task than we started out with, try and touch the * least amount of cfs_rqs. */ if (prev != p) { struct sched_entity *pse = &prev->se; + struct cfs_rq *cfs_rq; while (!(cfs_rq = is_same_group(se, pse))) { int se_depth = se->depth; @@ -8579,13 +8540,8 @@ simple: if (prev) put_prev_task(rq, prev); - do { - se = pick_next_entity(cfs_rq); - set_next_entity(cfs_rq, se); - cfs_rq = group_cfs_rq(se); - } while (cfs_rq); - - p = task_of(se); + for_each_sched_entity(se) + set_next_entity(cfs_rq_of(se), se); done: __maybe_unused; #ifdef CONFIG_SMP