The /proc/cgroups documentation says that the num_cgroups value is, "the number of control groups in this hierarchy using this controller." The value printed is simply the total number of cgroups in the hierarchy which is correct for v1, but not for the shared v2 hierarchy. Consider: controllers="cpuset cpu io memory hugetlb pids rdma misc" for c in $controllers do echo +$c > /sys/fs/cgroup/cgroup.subtree_control mkdir /sys/fs/cgroup/$c echo +$c > /sys/fs/cgroup/$c/cgroup.subtree_control for i in `seq 100`; do mkdir /sys/fs/cgroup/$c/$i; done done cat /proc/cgroups #subsys_name hierarchy num_cgroups enabled cpuset 0 809 1 cpu 0 809 1 cpuacct 0 809 1 blkio 0 809 1 memory 0 809 1 devices 0 809 1 freezer 0 809 1 net_cls 0 809 1 perf_event 0 809 1 net_prio 0 809 1 hugetlb 0 809 1 pids 0 809 1 rdma 0 809 1 misc 0 809 1 debug 0 809 1 A count of 809 is reported for each controller, but only 109 should be reported for most of them since each controller is enabled in only part of the hierarchy. (Note that io depends on memcg, so its count should be 209.) The number of cgroups using a controller is an important metric since kernel memory is used for each cgroup, and some kernel operations scale with the number of cgroups for some controllers (memory, io). So users have an interest in minimizing/tracking the number of them. - - - - - - - - - - Why is this functional patch currently a RFC: The point at which the new counters are incremented/decremented for most enumerated v2 controllers works fine. However for some controllers (the v2 documentation calls them "utility controllers") online_css and kill_css are never called: cpuacct, devices, freezer, net_cls, net_prio, debug. To deal with num_cgroups being reported as 1 for those utility controllers regardless of the number of cgroups that exist and support their use, I added is_v2_utility_controller which checks if a controller is among a hardcoded list instead of looking at some property of the cgroup_subsys since I don't think any such property currently exists. It'd be easy to miss adding a new utility controller to this list, so I am interested in hearing if folks have other ideas. I checked if I could use the presence of online_css in cgroup_subsys, but that only works for cpuacct and debug. --- include/linux/cgroup-defs.h | 6 ++++++ kernel/cgroup/cgroup-v1.c | 36 ++++++++++++++++++++++++++++++++++-- kernel/cgroup/cgroup.c | 4 ++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..400311222337 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -579,6 +579,12 @@ struct cgroup_root { /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; + /* + * Number of cgroups using each controller. Includes online and zombies. + * Used only for v2 controllers in /proc/cgroups. + */ + atomic_t nr_css[CGROUP_SUBSYS_COUNT]; + /* Hierarchy-specific flags */ unsigned int flags; diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 520a11cb12f4..8146bcc31421 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -663,6 +663,30 @@ struct cftype cgroup1_base_files[] = { { } /* terminate */ }; +static bool is_v2_utility_controller(int ssid) +{ + return +#ifdef CONFIG_CGROUP_CPUACCT + ssid == cpuacct_cgrp_id || +#endif +#ifdef CONFIG_CGROUP_DEVICE + ssid == devices_cgrp_id || +#endif +#ifdef CONFIG_CGROUP_FREEZER + ssid == freezer_cgrp_id || +#endif +#ifdef CONFIG_NET_CLS_CGROUP + ssid == net_cls_cgrp_id || +#endif +#ifdef CONFIG_CGROUP_NET_PRIO + ssid == net_prio_cgrp_id || +#endif +#ifdef CONFIG_CGROUP_DEBUG + ssid == debug_cgrp_id || +#endif + false; +} + /* Display information about each subsystem and each hierarchy */ int proc_cgroupstats_show(struct seq_file *m, void *v) { @@ -675,11 +699,19 @@ int proc_cgroupstats_show(struct seq_file *m, void *v) * cgroup_mutex contention. */ - for_each_subsys(ss, i) + for_each_subsys(ss, i) { + int count; + + if (!cgroup_on_dfl(&ss->root->cgrp) || is_v2_utility_controller(i)) + count = atomic_read(&ss->root->nr_cgrps); + else + count = atomic_read(&ss->root->nr_css[i]); + seq_printf(m, "%s\t%d\t%d\t%d\n", ss->legacy_name, ss->root->hierarchy_id, - atomic_read(&ss->root->nr_cgrps), + count, cgroup_ssid_enabled(i)); + } return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a66c088c851c..f25d0e77ae8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2047,6 +2047,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx) INIT_LIST_HEAD_RCU(&root->root_list); atomic_set(&root->nr_cgrps, 1); + for (int i = 0; i < CGROUP_SUBSYS_COUNT; ++i) + atomic_set(&root->nr_css[i], 0); cgrp->root = root; init_cgroup_housekeeping(cgrp); @@ -5362,6 +5364,7 @@ static void css_free_rwork_fn(struct work_struct *work) ss->css_free(css); cgroup_idr_remove(&ss->css_idr, id); cgroup_put(cgrp); + atomic_dec(&ss->root->nr_css[ss->id]); if (parent) css_put(parent); @@ -5503,6 +5506,7 @@ static int online_css(struct cgroup_subsys_state *css) atomic_inc(&css->online_cnt); if (css->parent) atomic_inc(&css->parent->online_cnt); + atomic_inc(&ss->root->nr_css[ss->id]); } return ret; } -- 2.45.0.rc1.225.g2a3ae87e7f-goog