We need to run RT threads in cgroup unified hierarchy, but we can't since the default rt_bandwidth.rt_runtime of non-root task_group is 0 and we haven't interface to update it. This patch add RT bandwidth interface "cpu.max.rt" and update the documentation accordingly. Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> --- v2: - minor fix for documentation per Randy's review. Thanks. --- Documentation/admin-guide/cgroup-v2.rst | 13 +++++++++++ kernel/sched/core.c | 31 +++++++++++++++++++++++++ kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 1 + 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 176298f2f4de..3d67366c99e1 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1055,6 +1055,19 @@ All time durations are in microseconds. The burst in the range [0, $MAX]. + cpu.max.rt + A read-write two-value file which exists on all cgroups when + CONFIG_RT_GROUP_SCHED is enabled, to control CPU bandwidth for + RT threads in the task group. + + The maximum bandwidth limit. It's in the following format:: + + $MAX $PERIOD + + which indicates that RT threads in the group may consume up to + $MAX in each $PERIOD duration. "max" for $MAX indicates no + limit. If only one number is written, $MAX is updated. + cpu.pressure A read-write nested-keyed file. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index daadedc78fd9..c16f8cc5de08 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11047,6 +11047,30 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of, } #endif +#ifdef CONFIG_RT_GROUP_SCHED +static int cpu_max_rt_show(struct seq_file *sf, void *v) +{ + struct task_group *tg = css_tg(seq_css(sf)); + + cpu_period_quota_print(sf, sched_group_rt_period(tg), sched_group_rt_runtime(tg)); + return 0; +} + +static ssize_t cpu_max_rt_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct task_group *tg = css_tg(of_css(of)); + u64 period = sched_group_rt_period(tg); + u64 runtime; + int ret; + + ret = cpu_period_quota_parse(buf, &period, &runtime); + if (!ret) + ret = tg_set_rt_bandwidth(tg, period, runtime); + return ret ?: nbytes; +} +#endif + static struct cftype cpu_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED { @@ -11082,6 +11106,13 @@ static struct cftype cpu_files[] = { .write_u64 = cpu_cfs_burst_write_u64, }, #endif +#ifdef CONFIG_RT_GROUP_SCHED + { + .name = "max.rt", + .seq_show = cpu_max_rt_show, + .write = cpu_max_rt_write, + }, +#endif #ifdef CONFIG_UCLAMP_TASK_GROUP { .name = "uclamp.min", diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8c9ed9664840..319ce586446f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2819,7 +2819,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) return ret; } -static int tg_set_rt_bandwidth(struct task_group *tg, +int tg_set_rt_bandwidth(struct task_group *tg, u64 rt_period, u64 rt_runtime) { int i, err = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7b19a72408b1..317480d535b0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -486,6 +486,7 @@ extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int cpu, struct sched_rt_entity *parent); +extern int tg_set_rt_bandwidth(struct task_group *tg, u64 rt_period, u64 rt_runtime); extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us); extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us); extern long sched_group_rt_runtime(struct task_group *tg); -- 2.36.1