The following commit has been merged into the sched/core branch of tip: Commit-ID: eff6c8ce8d4d7faef75f66614dd20bb50595d261 Gitweb: https://git.kernel.org/tip/eff6c8ce8d4d7faef75f66614dd20bb50595d261 Author: wuchi <wuchi.zero@xxxxxxxxx> AuthorDate: Tue, 21 Mar 2023 14:44:59 +08:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Wed, 22 Mar 2023 10:10:58 +01:00 sched/core: Reduce cost of sched_move_task when config autogroup Some sched_move_task calls are useless because that task_struct->sched_task_group maybe not changed (equals task_group of cpu_cgroup) when system enable autogroup. So do some checks in sched_move_task. sched_move_task eg: task A belongs to cpu_cgroup0 and autogroup0, it will always belong to cpu_cgroup0 when do_exit. So there is no need to do {de|en}queue. The call graph is as follow. do_exit sched_autogroup_exit_task sched_move_task dequeue_task sched_change_group A.sched_task_group = sched_get_task_group (=cpu_cgroup0) enqueue_task Performance results: =========================== 1. env cpu: bogomips=4600.00 kernel: 6.3.0-rc3 cpu_cgroup: 6:cpu,cpuacct:/user.slice 2. cmds do_exit script: for i in {0..10000}; do sleep 0 & done wait Run the above script, then use the following bpftrace cmd to get the cost of sched_move_task: bpftrace -e 'k:sched_move_task { @ts[tid] = nsecs; } kr:sched_move_task /@ts[tid]/ { @ns += nsecs - @ts[tid]; delete(@ts[tid]); }' 3. cost time(ns): without patch: 43528033 with patch: 18541416 diff:-24986617 -57.4% As the result show, the patch will save 57.4% in the scenario. Signed-off-by: wuchi <wuchi.zero@xxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20230321064459.39421-1-wuchi.zero@xxxxxxxxx --- kernel/sched/core.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9140a33..5ddd961 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10351,7 +10351,7 @@ void sched_release_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } -static void sched_change_group(struct task_struct *tsk) +static struct task_group *sched_get_task_group(struct task_struct *tsk) { struct task_group *tg; @@ -10363,7 +10363,13 @@ static void sched_change_group(struct task_struct *tsk) tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), struct task_group, css); tg = autogroup_task_group(tsk, tg); - tsk->sched_task_group = tg; + + return tg; +} + +static void sched_change_group(struct task_struct *tsk, struct task_group *group) +{ + tsk->sched_task_group = group; #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_change_group) @@ -10384,10 +10390,19 @@ void sched_move_task(struct task_struct *tsk) { int queued, running, queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + struct task_group *group; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(tsk, &rf); + /* + * Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous + * group changes. + */ + group = sched_get_task_group(tsk); + if (group == tsk->sched_task_group) + goto unlock; + update_rq_clock(rq); running = task_current(rq, tsk); @@ -10398,7 +10413,7 @@ void sched_move_task(struct task_struct *tsk) if (running) put_prev_task(rq, tsk); - sched_change_group(tsk); + sched_change_group(tsk, group); if (queued) enqueue_task(rq, tsk, queue_flags); @@ -10412,6 +10427,7 @@ void sched_move_task(struct task_struct *tsk) resched_curr(rq); } +unlock: task_rq_unlock(rq, tsk, &rf); }