The following commit has been merged into the perf/core branch of tip: Commit-ID: c5d93d23a26012a98b77f9e354ab9b3afd420059 Gitweb: https://git.kernel.org/tip/c5d93d23a26012a98b77f9e354ab9b3afd420059 Author: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> AuthorDate: Thu, 04 Jul 2024 19:03:37 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Tue, 09 Jul 2024 13:26:35 +02:00 perf: Enqueue SIGTRAP always via task_work. A signal is delivered by raising irq_work() which works from any context including NMI. irq_work() can be delayed if the architecture does not provide an interrupt vector. In order not to lose a signal, the signal is injected via task_work during event_sched_out(). Instead going via irq_work, the signal could be added directly via task_work. The signal is sent to current and can be enqueued on its return path to userland. Queue signal via task_work and consider possible NMI context. Remove perf_event::pending_sigtrap and and use perf_event::pending_work instead. Reported-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Tested-by: Marco Elver <elver@xxxxxxxxxx> Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Link: https://lore.kernel.org/r/20240704170424.1466941-4-bigeasy@xxxxxxxxxxxxx --- include/linux/perf_event.h | 3 +-- kernel/events/core.c | 31 ++++++++++--------------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 393fb13..ea0d824 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -781,7 +781,6 @@ struct perf_event { unsigned int pending_wakeup; unsigned int pending_kill; unsigned int pending_disable; - unsigned int pending_sigtrap; unsigned long pending_addr; /* SIGTRAP */ struct irq_work pending_irq; struct callback_head pending_task; @@ -963,7 +962,7 @@ struct perf_event_context { struct rcu_head rcu_head; /* - * Sum (event->pending_sigtrap + event->pending_work) + * Sum (event->pending_work + event->pending_work) * * The SIGTRAP is targeted at ctx->task, as such it won't do changing * that until the signal is delivered. diff --git a/kernel/events/core.c b/kernel/events/core.c index c54da50..73e1b02 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2283,17 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) state = PERF_EVENT_STATE_OFF; } - if (event->pending_sigtrap) { - event->pending_sigtrap = 0; - if (state != PERF_EVENT_STATE_OFF && - !event->pending_work && - !task_work_add(current, &event->pending_task, TWA_RESUME)) { - event->pending_work = 1; - } else { - local_dec(&event->ctx->nr_pending); - } - } - perf_event_set_state(event, state); if (!is_software_event(event)) @@ -6776,11 +6765,6 @@ static void __perf_pending_irq(struct perf_event *event) * Yay, we hit home and are in the context of the event. */ if (cpu == smp_processor_id()) { - if (event->pending_sigtrap) { - event->pending_sigtrap = 0; - perf_sigtrap(event); - local_dec(&event->ctx->nr_pending); - } if (event->pending_disable) { event->pending_disable = 0; perf_event_disable_local(event); @@ -9721,21 +9705,26 @@ static int __perf_event_overflow(struct perf_event *event, */ bool valid_sample = sample_is_allowed(event, regs); unsigned int pending_id = 1; + enum task_work_notify_mode notify_mode; if (regs) pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; - if (!event->pending_sigtrap) { - event->pending_sigtrap = pending_id; + + notify_mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME; + + if (!event->pending_work && + !task_work_add(current, &event->pending_task, notify_mode)) { + event->pending_work = pending_id; local_inc(&event->ctx->nr_pending); event->pending_addr = 0; if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) event->pending_addr = data->addr; - irq_work_queue(&event->pending_irq); + } else if (event->attr.exclude_kernel && valid_sample) { /* * Should not be able to return to user space without - * consuming pending_sigtrap; with exceptions: + * consuming pending_work; with exceptions: * * 1. Where !exclude_kernel, events can overflow again * in the kernel without returning to user space. @@ -9745,7 +9734,7 @@ static int __perf_event_overflow(struct perf_event *event, * To approximate progress (with false negatives), * check 32-bit hash of the current IP. */ - WARN_ON_ONCE(event->pending_sigtrap != pending_id); + WARN_ON_ONCE(event->pending_work != pending_id); } }