From: Lei Chen <lennychen@xxxxxxxxxxx> Introduce new attribute "priority" to control group, which could be used as scale by subssytem to manipulate the behaviors of processes. The default value of "priority" is set to 0 which means the highest priority, and the totally levels of priority is defined by CGROUP_PRIORITY_MAX. Signed-off-by: Lei Chen <lennychen@xxxxxxxxxxx> Signed-off-by: Liu Yu <allanyuliu@xxxxxxxxxxx> Signed-off-by: Peng Zhiguang <zgpeng@xxxxxxxxxxx> Signed-off-by: Yulei Zhang <yuleixzhang@xxxxxxxxxxx> --- include/linux/cgroup-defs.h | 2 + include/linux/cgroup.h | 2 + kernel/cgroup/cgroup.c | 90 +++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 559ee05f8..3fa2f28a9 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -417,6 +417,7 @@ struct cgroup { u16 subtree_ss_mask; u16 old_subtree_control; u16 old_subtree_ss_mask; + u16 priority; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; @@ -640,6 +641,7 @@ struct cgroup_subsys { void (*exit)(struct task_struct *task); void (*release)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); + int (*css_priority_change)(struct cgroup_subsys_state *css, u16 old, u16 new); bool early_init:1; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4f2f79de0..734d51aba 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -47,6 +47,7 @@ struct kernel_clone_args; /* internal flags */ #define CSS_TASK_ITER_SKIPPED (1U << 16) +#define CGROUP_PRIORITY_MAX 8 /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { @@ -957,5 +958,6 @@ static inline void cgroup_bpf_get(struct cgroup *cgrp) {} static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ +ssize_t cgroup_priority(struct cgroup_subsys_state *css); #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 9153b20e5..aa019ad24 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1892,6 +1892,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) cgrp->dom_cgrp = cgrp; cgrp->max_descendants = INT_MAX; cgrp->max_depth = INT_MAX; + cgrp->priority = 0; INIT_LIST_HEAD(&cgrp->rstat_css_list); prev_cputime_init(&cgrp->prev_cputime); @@ -4783,6 +4784,88 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of, return __cgroup_procs_write(of, buf, false) ?: nbytes; } +static int cgroup_priority_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + u16 prio = cgrp->priority; + + seq_printf(seq, "%d\n", prio); + + return 0; +} + +static void cgroup_set_priority(struct cgroup *cgrp, unsigned int priority) +{ + u16 old = cgrp->priority; + struct cgroup_subsys_state *css; + int ssid; + + cgrp->priority = priority; + for_each_css(css, ssid, cgrp) { + if (css->ss->css_priority_change) + css->ss->css_priority_change(css, old, priority); + } +} + +static void cgroup_priority_propagate(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + u16 priority = cgrp->priority; + + lockdep_assert_held(&cgroup_mutex); + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { + if (dsct->priority < priority) + cgroup_set_priority(dsct, priority); + } +} + +static ssize_t cgroup_priority_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct cgroup *cgrp, *parent; + ssize_t ret; + u16 prio, orig; + + buf = strstrip(buf); + ret = kstrtou16(buf, 0, &prio); + if (ret) + return ret; + + if (prio < 0 || prio >= CGROUP_PRIORITY_MAX) + return -ERANGE; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENOENT; + parent = cgroup_parent(cgrp); + if (parent && prio < parent->priority) { + ret = -EINVAL; + goto unlock_out; + } + orig = cgrp->priority; + if (prio == orig) + goto unlock_out; + + cgroup_set_priority(cgrp, prio); + cgroup_priority_propagate(cgrp); +unlock_out: + cgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + +ssize_t cgroup_priority(struct cgroup_subsys_state *css) +{ + struct cgroup *cgrp = css->cgroup; + unsigned int prio = 0; + + if (cgrp) + prio = cgrp->priority; + return prio; +} +EXPORT_SYMBOL(cgroup_priority); + /* cgroup core interface files for the default hierarchy */ static struct cftype cgroup_base_files[] = { { @@ -4836,6 +4919,12 @@ static struct cftype cgroup_base_files[] = { .seq_show = cgroup_max_depth_show, .write = cgroup_max_depth_write, }, + { + .name = "cgroup.priority", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_priority_show, + .write = cgroup_priority_write, + }, { .name = "cgroup.stat", .seq_show = cgroup_stat_show, @@ -5178,6 +5267,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; + cgrp->priority = parent->priority; ret = psi_cgroup_alloc(cgrp); if (ret) -- 2.28.0