The following commit has been merged into the sched/core branch of tip: Commit-ID: faa42d29419def58d3c3e5b14ad4037f0af3b496 Gitweb: https://git.kernel.org/tip/faa42d29419def58d3c3e5b14ad4037f0af3b496 Author: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx> AuthorDate: Wed, 26 Jun 2024 10:35:05 +08:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Mon, 29 Jul 2024 12:22:35 +02:00 sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy Consider the following cgroup: root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE. Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED. Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Josh Don <joshdon@xxxxxxxxxx> Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@xxxxxxxxxxxxxxxxx --- kernel/sched/fair.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02694fc..99c80ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -511,7 +511,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8381,16 +8381,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (test_tsk_need_resched(curr)) return; - /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); @@ -8400,7 +8391,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int pse_is_idle = se_is_idle(pse); /* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -8408,9 +8399,14 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (cse_is_idle != pse_is_idle) return; + /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */