The following commit has been merged into the perf/core branch of tip: Commit-ID: 4eabf533fb1886089ef57e0c8ec52048b1741e39 Gitweb: https://git.kernel.org/tip/4eabf533fb1886089ef57e0c8ec52048b1741e39 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Mon, 04 Nov 2024 14:39:20 +01:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Tue, 04 Mar 2025 09:43:22 +01:00 perf/core: Detach 'struct perf_cpu_pmu_context' and 'struct pmu' lifetimes In prepration for being able to unregister a PMU with existing events, it becomes important to detach struct perf_cpu_pmu_context lifetimes from that of struct pmu. Notably struct perf_cpu_pmu_context embeds a struct perf_event_pmu_context that can stay referenced until the last event goes. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Reviewed-by: Ravi Bangoria <ravi.bangoria@xxxxxxx> Link: https://lore.kernel.org/r/20241104135518.760214287@xxxxxxxxxxxxx --- include/linux/perf_event.h | 4 +-- kernel/events/core.c | 56 +++++++++++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5f293e6..76f4265 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -343,7 +343,7 @@ struct pmu { */ unsigned int scope; - struct perf_cpu_pmu_context __percpu *cpu_pmu_context; + struct perf_cpu_pmu_context __percpu **cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -922,7 +922,7 @@ struct perf_event_pmu_context { struct list_head pinned_active; struct list_head flexible_active; - /* Used to avoid freeing per-cpu perf_event_pmu_context */ + /* Used to identify the per-cpu perf_event_pmu_context */ unsigned int embedded : 1; unsigned int nr_events; diff --git a/kernel/events/core.c b/kernel/events/core.c index 773875a..8b2a8c3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1219,7 +1219,7 @@ static int perf_mux_hrtimer_restart_ipi(void *arg) static __always_inline struct perf_cpu_pmu_context *this_cpc(struct pmu *pmu) { - return this_cpu_ptr(pmu->cpu_pmu_context); + return *this_cpu_ptr(pmu->cpu_pmu_context); } void perf_pmu_disable(struct pmu *pmu) @@ -5007,11 +5007,14 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, */ struct perf_cpu_pmu_context *cpc; - cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); + cpc = *per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); epc = &cpc->epc; raw_spin_lock_irq(&ctx->lock); if (!epc->ctx) { - atomic_set(&epc->refcount, 1); + /* + * One extra reference for the pmu; see perf_pmu_free(). + */ + atomic_set(&epc->refcount, 2); epc->embedded = 1; list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); epc->ctx = ctx; @@ -5087,6 +5090,15 @@ static void get_pmu_ctx(struct perf_event_pmu_context *epc) WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount)); } +static void free_cpc_rcu(struct rcu_head *head) +{ + struct perf_cpu_pmu_context *cpc = + container_of(head, typeof(*cpc), epc.rcu_head); + + kfree(cpc->epc.task_ctx_data); + kfree(cpc); +} + static void free_epc_rcu(struct rcu_head *head) { struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head); @@ -5121,8 +5133,10 @@ static void put_pmu_ctx(struct perf_event_pmu_context *epc) raw_spin_unlock_irqrestore(&ctx->lock, flags); - if (epc->embedded) + if (epc->embedded) { + call_rcu(&epc->rcu_head, free_cpc_rcu); return; + } call_rcu(&epc->rcu_head, free_epc_rcu); } @@ -11752,7 +11766,7 @@ perf_event_mux_interval_ms_store(struct device *dev, cpus_read_lock(); for_each_online_cpu(cpu) { struct perf_cpu_pmu_context *cpc; - cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu); + cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu); cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc); @@ -11925,7 +11939,25 @@ static void perf_pmu_free(struct pmu *pmu) device_del(pmu->dev); put_device(pmu->dev); } - free_percpu(pmu->cpu_pmu_context); + + if (pmu->cpu_pmu_context) { + int cpu; + + for_each_possible_cpu(cpu) { + struct perf_cpu_pmu_context *cpc; + + cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu); + if (!cpc) + continue; + if (cpc->epc.embedded) { + /* refcount managed */ + put_pmu_ctx(&cpc->epc); + continue; + } + kfree(cpc); + } + free_percpu(pmu->cpu_pmu_context); + } } DEFINE_FREE(pmu_unregister, struct pmu *, if (_T) perf_pmu_free(_T)) @@ -11964,14 +11996,20 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type) return ret; } - pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context); + pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context *); if (!pmu->cpu_pmu_context) return -ENOMEM; for_each_possible_cpu(cpu) { - struct perf_cpu_pmu_context *cpc; + struct perf_cpu_pmu_context *cpc = + kmalloc_node(sizeof(struct perf_cpu_pmu_context), + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + + if (!cpc) + return -ENOMEM; - cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu); + *per_cpu_ptr(pmu->cpu_pmu_context, cpu) = cpc; __perf_init_event_pmu_context(&cpc->epc, pmu); __perf_mux_hrtimer_init(cpc, cpu); }
![]() |