[PATCH v5 2/5] cgroup/pids: Make event counters hierarchical

Michal Koutný <mkoutny@xxxxxxxx> · Tue, 21 May 2024 11:21:27 +0200

The pids.events file should honor the hierarchy, so make the events
propagate from their origin up to the root on the unified hierarchy. The
legacy behavior remains non-hierarchical.

Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
---
 Documentation/admin-guide/cgroup-v2.rst |  9 +++--
 kernel/cgroup/pids.c                    | 46 ++++++++++++++++---------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 945ff743a3c9..0b5f77104e8b 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -240,8 +240,11 @@ cgroup v2 currently supports the following mount options.
           v2 is remounted later on).
 
   pids_localevents
-        Represent fork failures inside cgroup's pids.events:max (v1 behavior),
-        not its limit being hit (v2 behavior).
+        The option restores v1-like behavior of pids.events:max, that is only
+        local (inside cgroup proper) fork failures are counted. Without this
+        option pids.events.max represents any pids.max enforcemnt across
+        cgroup's subtree.
+
 
 
 Organizing Processes and Threads
@@ -2205,7 +2208,7 @@ PID Interface Files
 	modified event. The following entries are defined.
 
 	  max
-		The number of times the cgroup's number of processes hit the
+		The number of times the cgroup's total number of processes hit the pids.max
 		limit (see also pids_localevents).
 
 Organisational operations are not blocked by cgroup policies, so it is
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index a557f5c8300b..c09b744d548c 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -238,6 +238,34 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
 	}
 }
 
+static void pids_event(struct pids_cgroup *pids_forking,
+		       struct pids_cgroup *pids_over_limit)
+{
+	struct pids_cgroup *p = pids_forking;
+	bool limit = false;
+
+	for (; parent_pids(p); p = parent_pids(p)) {
+		/* Only log the first time limit is hit. */
+		if (atomic64_inc_return(&p->events[PIDCG_FORKFAIL]) == 1) {
+			pr_info("cgroup: fork rejected by pids controller in ");
+			pr_cont_cgroup_path(p->css.cgroup);
+			pr_cont("\n");
+		}
+		cgroup_file_notify(&p->events_file);
+
+		if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) ||
+		    cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
+			break;
+
+		if (p == pids_over_limit)
+			limit = true;
+		if (limit)
+			atomic64_inc(&p->events[PIDCG_MAX]);
+
+		cgroup_file_notify(&p->events_file);
+	}
+}
+
 /*
  * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
  * on cgroup_threadgroup_change_begin() held by the copy_process().
@@ -254,23 +282,9 @@ static int pids_can_fork(struct task_struct *task, struct css_set *cset)
 		css = task_css_check(current, pids_cgrp_id, true);
 	pids = css_pids(css);
 	err = pids_try_charge(pids, 1, &pids_over_limit);
-	if (err) {
-		/* compatibility on v1 where events were notified in leaves. */
-		if (!cgroup_subsys_on_dfl(pids_cgrp_subsys))
-			pids_over_limit = pids;
-
-		/* Only log the first time limit is hit. */
-		if (atomic64_inc_return(&pids->events[PIDCG_FORKFAIL]) == 1) {
-			pr_info("cgroup: fork rejected by pids controller in ");
-			pr_cont_cgroup_path(pids->css.cgroup);
-			pr_cont("\n");
-		}
-		atomic64_inc(&pids_over_limit->events[PIDCG_MAX]);
+	if (err)
+		pids_event(pids, pids_over_limit);
 
-		cgroup_file_notify(&pids->events_file);
-		if (pids_over_limit != pids)
-			cgroup_file_notify(&pids_over_limit->events_file);
-	}
 	return err;
 }
 
-- 
2.44.0