pidlists are structure that allows printing cgroup member tasks in sorted order and with proper pidns (in)visibility. The promise of sorted output was removed in the commit 7823da36ce8e ("cgroups: update documentation of cgroups tasks and procs files") (more than 13 years ago at the time of writing this). On systems that still use v1 hierarchies (e.g. systemd in non-unified mode), pidlists are problematic because: a) their cache unnecessarily busies workqueues (cgroup_pidlist_destroy_wq) b) PID recycling [1] may lead to logging noise: > seq_file: buggy .next function kernfs_seq_next did not update position index It is possible to reuse cgroup v2 code that relies directly on css_set iterator without caching (effectively extracting css_task_iter_* calls from pidlist_array_load()). We only need to make a provision for pidns by skipping external tasks (instead of printing '0' like in v2). [1] cgroup v1 code uses PID as the iterator position, PID recycling causes a repetition of the same PID at the end of (`tasks`) pidlist and seq_file interprets this as a non-incremented index. (seq_file code corrects it by PID++, which should be harmless unless tasks file is read byte by byte). Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx> --- kernel/cgroup/cgroup-internal.h | 5 +++++ kernel/cgroup/cgroup-v1.c | 32 ++++++++++++++++++++++++-------- kernel/cgroup/cgroup.c | 8 ++++---- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c56071f150f2..8edf7aeac159 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -264,6 +264,11 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); +void cgroup_procs_release(struct kernfs_open_file *of); +void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos); +void *cgroup_procs_start(struct seq_file *s, loff_t *pos); +void *cgroup_threads_start(struct seq_file *s, loff_t *pos); + /* * rstat.c */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 83044312bc41..7c0945ccba0d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -486,6 +486,22 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v) return 0; } +static int cgroup1_procs_show(struct seq_file *s, void *v) +{ + pid_t pid; + + /* Print PID both for `tasks` file (threads) and `cgroup.procs` + * (processes), the latter iterates with CSS_TASK_ITER_PROCS hence we + * get PIDs of thread group leaders, i.e. tgids. + */ + pid = task_pid_vnr(v); + if (!pid) + return SEQ_SKIP; + + seq_printf(s, "%d\n", pid); + return 0; +} + static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool threadgroup) @@ -623,11 +639,11 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css, struct cftype cgroup1_base_files[] = { { .name = "cgroup.procs", - .seq_start = cgroup_pidlist_start, - .seq_next = cgroup_pidlist_next, - .seq_stop = cgroup_pidlist_stop, - .seq_show = cgroup_pidlist_show, .private = CGROUP_FILE_PROCS, + .release = cgroup_procs_release, + .seq_start = cgroup_procs_start, + .seq_next = cgroup_procs_next, + .seq_show = cgroup1_procs_show, .write = cgroup1_procs_write, }, { @@ -642,11 +658,11 @@ struct cftype cgroup1_base_files[] = { }, { .name = "tasks", - .seq_start = cgroup_pidlist_start, - .seq_next = cgroup_pidlist_next, - .seq_stop = cgroup_pidlist_stop, - .seq_show = cgroup_pidlist_show, .private = CGROUP_FILE_TASKS, + .release = cgroup_procs_release, + .seq_start = cgroup_threads_start, + .seq_next = cgroup_procs_next, + .seq_show = cgroup1_procs_show, .write = cgroup1_tasks_write, }, { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f55a40db065f..3c5ba2ca7852 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4963,7 +4963,7 @@ void css_task_iter_end(struct css_task_iter *it) put_task_struct(it->cur_task); } -static void cgroup_procs_release(struct kernfs_open_file *of) +void cgroup_procs_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; @@ -4971,7 +4971,7 @@ static void cgroup_procs_release(struct kernfs_open_file *of) css_task_iter_end(&ctx->procs.iter); } -static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) +void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; struct cgroup_file_ctx *ctx = of->priv; @@ -5008,7 +5008,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return cgroup_procs_next(s, NULL, NULL); } -static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) +void *cgroup_procs_start(struct seq_file *s, loff_t *pos) { struct cgroup *cgrp = seq_css(s)->cgroup; @@ -5152,7 +5152,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of, return __cgroup_procs_write(of, buf, true) ?: nbytes; } -static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) +void *cgroup_threads_start(struct seq_file *s, loff_t *pos) { return __cgroup_procs_start(s, pos, 0); } -- 2.41.0