This is a note to let you know that I've just added the patch titled perf: Disallow mis-matched inherited group reads to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: perf-disallow-mis-matched-inherited-group-reads.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 32671e3799ca2e4590773fd0e63aaa4229e50c06 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Date: Wed, 18 Oct 2023 13:56:54 +0200 Subject: perf: Disallow mis-matched inherited group reads From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> commit 32671e3799ca2e4590773fd0e63aaa4229e50c06 upstream. Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commit fa8c269353d5 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes: fa8c269353d5 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic <markovicbudimir@xxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20231018115654.GK33217@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/linux/perf_event.h | 1 + kernel/events/core.c | 39 +++++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -579,6 +579,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps; + unsigned int group_generation; struct perf_event *group_leader; struct pmu *pmu; void *pmu_private; --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1699,6 +1699,7 @@ static void perf_group_attach(struct per list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++; perf_event__header_size(group_leader); @@ -1771,6 +1772,7 @@ static void perf_group_detach(struct per if (event->group_leader != event) { list_del_init(&event->group_entry); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; } @@ -4483,7 +4485,7 @@ static int __perf_read_group_add(struct u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -4493,6 +4495,33 @@ static int __perf_read_group_add(struct return ret; raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + } /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -4522,8 +4551,9 @@ static int __perf_read_group_add(struct values[n++] = primary_event_id(sub); } +unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; } static int perf_read_group(struct perf_event *event, @@ -4542,10 +4572,6 @@ static int perf_read_group(struct perf_e values[0] = 1 + leader->nr_siblings; - /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex); ret = __perf_read_group_add(leader, read_format, values); @@ -11033,6 +11059,7 @@ static int inherit_group(struct perf_eve if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); } + leader->group_generation = parent_event->group_generation; return 0; } Patches currently in stable-queue which might be from peterz@xxxxxxxxxxxxx are queue-4.14/perf-disallow-mis-matched-inherited-group-reads.patch