6.6-stable review patch. If anyone has any objections, please let me know. ------------------ From: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx> [ Upstream commit faa42d29419def58d3c3e5b14ad4037f0af3b496 ] Consider the following cgroup: root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE. Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED. Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Josh Don <joshdon@xxxxxxxxxx> Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@xxxxxxxxxxxxxxxxx Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- kernel/sched/fair.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2e1009e5706e..5fc0d9cc9d9d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -533,7 +533,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8209,16 +8209,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (test_tsk_need_resched(curr)) return; - /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); @@ -8228,7 +8219,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ pse_is_idle = se_is_idle(pse); /* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -8236,9 +8227,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (cse_is_idle != pse_is_idle) return; + /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */ -- 2.43.0