The following commit has been merged into the sched/core branch of tip: Commit-ID: 7e2ce158699bb7b6a489c7c1d89c0dde2d4ceef5 Gitweb: https://git.kernel.org/tip/7e2ce158699bb7b6a489c7c1d89c0dde2d4ceef5 Author: Josh Don <joshdon@xxxxxxxxxx> AuthorDate: Thu, 19 Aug 2021 18:04:02 -07:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Thu, 09 Sep 2021 11:27:31 +02:00 sched: reduce sched slice for SCHED_IDLE entities Use a small, non-scaled min granularity for SCHED_IDLE entities, when competing with normal entities. This reduces the latency of getting a normal entity back on cpu, at the expense of increased context switch frequency of SCHED_IDLE entities. The benefit of this change is to reduce the round-robin latency for normal entities when competing with a SCHED_IDLE entity. Example: on a machine with HZ=1000, spawned two threads, one of which is SCHED_IDLE, and affined to one cpu. Without this patch, the SCHED_IDLE thread runs for 4ms then waits for 1.4s. With this patch, it runs for 1ms and waits 340ms (as it round-robins with the other thread). Signed-off-by: Josh Don <joshdon@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> Link: https://lore.kernel.org/r/20210820010403.946838-4-joshdon@xxxxxxxxxx --- kernel/sched/debug.c | 2 ++ kernel/sched/fair.c | 29 ++++++++++++++++++++++++----- kernel/sched/sched.h | 1 + 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 3353857..317ef56 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -305,6 +305,7 @@ static __init int sched_init_debug(void) debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); + debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity); debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); @@ -806,6 +807,7 @@ static void sched_debug_header(struct seq_file *m) SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); + PN(sysctl_sched_idle_min_granularity); PN(sysctl_sched_wakeup_granularity); P(sysctl_sched_child_runs_first); P(sysctl_sched_features); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d7c0b9d..7330a77 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -60,6 +60,14 @@ unsigned int sysctl_sched_min_granularity = 750000ULL; static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; /* + * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks. + * Applies only when SCHED_IDLE tasks compete with normal tasks. + * + * (default: 0.75 msec) + */ +unsigned int sysctl_sched_idle_min_granularity = 750000ULL; + +/* * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity */ static unsigned int sched_nr_latency = 8; @@ -665,6 +673,8 @@ static u64 __sched_period(unsigned long nr_running) return sysctl_sched_latency; } +static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq); + /* * We calculate the wall-time slice from the period by taking a part * proportional to the weight. @@ -674,6 +684,8 @@ static u64 __sched_period(unsigned long nr_running) static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned int nr_running = cfs_rq->nr_running; + struct sched_entity *init_se = se; + unsigned int min_gran; u64 slice; if (sched_feat(ALT_PERIOD)) @@ -684,12 +696,13 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) for_each_sched_entity(se) { struct load_weight *load; struct load_weight lw; + struct cfs_rq *qcfs_rq; - cfs_rq = cfs_rq_of(se); - load = &cfs_rq->load; + qcfs_rq = cfs_rq_of(se); + load = &qcfs_rq->load; if (unlikely(!se->on_rq)) { - lw = cfs_rq->load; + lw = qcfs_rq->load; update_load_add(&lw, se->load.weight); load = &lw; @@ -697,8 +710,14 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) slice = __calc_delta(slice, se->load.weight, load); } - if (sched_feat(BASE_SLICE)) - slice = max(slice, (u64)sysctl_sched_min_granularity); + if (sched_feat(BASE_SLICE)) { + if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq)) + min_gran = sysctl_sched_idle_min_granularity; + else + min_gran = sysctl_sched_min_granularity; + + slice = max_t(u64, slice, min_gran); + } return slice; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 413298d..6b2d8b7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2400,6 +2400,7 @@ extern const_debug unsigned int sysctl_sched_migration_cost; #ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_idle_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern int sysctl_resched_latency_warn_ms; extern int sysctl_resched_latency_warn_once;