Task can set its latency priority with sched_setattr(), which is then used to set the latency offset of its sched_enity, but sched group entities still have the default latency offset value. Add a latency.nice field in cpu cgroup controller to set the latency priority of the group similarly to sched_setattr(). The latency priority is then used to set the offset of the sched_entities of the group. Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> Tested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx> --- Documentation/admin-guide/cgroup-v2.rst | 10 ++++++++ kernel/sched/core.c | 30 +++++++++++++++++++++++ kernel/sched/fair.c | 32 +++++++++++++++++++++++++ kernel/sched/sched.h | 4 ++++ 4 files changed, 76 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 1b3ed1c3b3f1..c08424593e4a 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1121,6 +1121,16 @@ All time durations are in microseconds. values similar to the sched_setattr(2). This maximum utilization value is used to clamp the task specific maximum utilization clamp. + cpu.latency.nice + A read-write single value file which exists on non-root + cgroups. The default is "0". + + The nice value is in the range [-20, 19]. + + This interface file allows reading and setting latency using the + same values used by sched_setattr(2). The latency_nice of a group is + used to limit the impact of the latency_nice of a task outside the + group. Memory diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d5b7e237d79b..093cc1af73dc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11059,6 +11059,25 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css, { return sched_group_set_idle(css_tg(css), idle); } + +static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return LATENCY_TO_NICE(css_tg(css)->latency_prio); +} + +static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css, + struct cftype *cft, s64 nice) +{ + int prio; + + if (nice < MIN_LATENCY_NICE || nice > MAX_LATENCY_NICE) + return -ERANGE; + + prio = NICE_TO_LATENCY(nice); + + return sched_group_set_latency(css_tg(css), prio); +} #endif static struct cftype cpu_legacy_files[] = { @@ -11073,6 +11092,11 @@ static struct cftype cpu_legacy_files[] = { .read_s64 = cpu_idle_read_s64, .write_s64 = cpu_idle_write_s64, }, + { + .name = "latency.nice", + .read_s64 = cpu_latency_nice_read_s64, + .write_s64 = cpu_latency_nice_write_s64, + }, #endif #ifdef CONFIG_CFS_BANDWIDTH { @@ -11290,6 +11314,12 @@ static struct cftype cpu_files[] = { .read_s64 = cpu_idle_read_s64, .write_s64 = cpu_idle_write_s64, }, + { + .name = "latency.nice", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = cpu_latency_nice_read_s64, + .write_s64 = cpu_latency_nice_write_s64, + }, #endif #ifdef CONFIG_CFS_BANDWIDTH { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 414b6243208b..dc7570f43ebe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12274,6 +12274,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) goto err; tg->shares = NICE_0_LOAD; + tg->latency_prio = DEFAULT_LATENCY_PRIO; init_cfs_bandwidth(tg_cfs_bandwidth(tg)); @@ -12372,6 +12373,9 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, } se->my_q = cfs_rq; + + se->latency_offset = calc_latency_offset(tg->latency_prio); + /* guarantee group entities always have weight */ update_load_set(&se->load, NICE_0_LOAD); se->parent = parent; @@ -12502,6 +12506,34 @@ int sched_group_set_idle(struct task_group *tg, long idle) return 0; } +int sched_group_set_latency(struct task_group *tg, int prio) +{ + long latency_offset; + int i; + + if (tg == &root_task_group) + return -EINVAL; + + mutex_lock(&shares_mutex); + + if (tg->latency_prio == prio) { + mutex_unlock(&shares_mutex); + return 0; + } + + tg->latency_prio = prio; + latency_offset = calc_latency_offset(prio); + + for_each_possible_cpu(i) { + struct sched_entity *se = tg->se[i]; + + WRITE_ONCE(se->latency_offset, latency_offset); + } + + mutex_unlock(&shares_mutex); + return 0; +} + #else /* CONFIG_FAIR_GROUP_SCHED */ void free_fair_sched_group(struct task_group *tg) { } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3f42f86105d4..9a2e71231083 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -378,6 +378,8 @@ struct task_group { /* A positive value indicates that this is a SCHED_IDLE group. */ int idle; + /* latency priority of the group. */ + int latency_prio; #ifdef CONFIG_SMP /* @@ -488,6 +490,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern int sched_group_set_idle(struct task_group *tg, long idle); +extern int sched_group_set_latency(struct task_group *tg, int prio); + #ifdef CONFIG_SMP extern void set_task_rq_fair(struct sched_entity *se, struct cfs_rq *prev, struct cfs_rq *next); -- 2.34.1