On Mon, Mar 22, 2021 at 02:24:40PM +0100, Marco Elver wrote: > To make compatible with more recent libc, we'll need to fixup the tests > with the below. OK, that reprodiced things here, thanks! The below seems to not explode instantly.... it still has the alternative version in as well (and I think it might even work too, but the one I left in seems simpler). --- kernel/events/core.c | 154 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 111 insertions(+), 43 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a7220e8c447e..8c0f905cc017 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2167,8 +2172,9 @@ static void perf_group_detach(struct perf_event *event) * If this is a sibling, remove it from its group. */ if (leader != event) { + leader->nr_siblings--; list_del_init(&event->sibling_list); - event->group_leader->nr_siblings--; + event->group_leader = event; goto out; } @@ -2182,8 +2188,9 @@ static void perf_group_detach(struct perf_event *event) if (sibling->event_caps & PERF_EV_CAP_SIBLING) perf_remove_sibling_event(sibling); - sibling->group_leader = sibling; + leader->nr_siblings--; list_del_init(&sibling->sibling_list); + sibling->group_leader = sibling; /* Inherit group flags from the previous leader */ sibling->group_caps = event->group_caps; @@ -2360,10 +2367,19 @@ __perf_remove_from_context(struct perf_event *event, static void perf_remove_from_context(struct perf_event *event, unsigned long flags) { struct perf_event_context *ctx = event->ctx; + bool remove; lockdep_assert_held(&ctx->mutex); - event_function_call(event, __perf_remove_from_context, (void *)flags); + /* + * There is concurrency vs remove_on_exec(). + */ + raw_spin_lock_irq(&ctx->lock); + remove = (event->attach_state & PERF_ATTACH_CONTEXT); + raw_spin_unlock_irq(&ctx->lock); + + if (remove) + event_function_call(event, __perf_remove_from_context, (void *)flags); /* * The above event_function_call() can NO-OP when it hits @@ -4232,41 +4248,92 @@ static void perf_event_enable_on_exec(int ctxn) static void perf_remove_from_owner(struct perf_event *event); static void perf_event_exit_event(struct perf_event *child_event, struct perf_event_context *child_ctx, - struct task_struct *child); + struct task_struct *child, + bool removed); /* * Removes all events from the current task that have been marked * remove-on-exec, and feeds their values back to parent events. */ -static void perf_event_remove_on_exec(void) +static void perf_event_remove_on_exec(int ctxn) { - int ctxn; + struct perf_event_context *ctx, *clone_ctx = NULL; + struct perf_event *event, *next; + LIST_HEAD(free_list); + unsigned long flags; + bool modified = false; - for_each_task_context_nr(ctxn) { - struct perf_event_context *ctx; - struct perf_event *event, *next; + ctx = perf_pin_task_context(current, ctxn); + if (!ctx) + return; - ctx = perf_pin_task_context(current, ctxn); - if (!ctx) + mutex_lock(&ctx->mutex); + + if (WARN_ON_ONCE(ctx->task != current)) + goto unlock; + + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { + if (!event->attr.remove_on_exec) continue; - mutex_lock(&ctx->mutex); - list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { - if (!event->attr.remove_on_exec) - continue; + if (!is_kernel_event(event)) + perf_remove_from_owner(event); - if (!is_kernel_event(event)) - perf_remove_from_owner(event); - perf_remove_from_context(event, DETACH_GROUP); - /* - * Remove the event and feed back its values to the - * parent event. - */ - perf_event_exit_event(event, ctx, current); - } - mutex_unlock(&ctx->mutex); - put_ctx(ctx); + modified = true; + + perf_remove_from_context(event, !!event->parent * DETACH_GROUP); + perf_event_exit_event(event, ctx, current, true); + } + + raw_spin_lock_irqsave(&ctx->lock, flags); + if (modified) + clone_ctx = unclone_ctx(ctx); + --ctx->pin_count; + raw_spin_unlock_irqrestore(&ctx->lock, flags); + +#if 0 + struct perf_cpu_context *cpuctx; + + if (!modified) { + perf_unpin_context(ctx); + goto unlock; + } + + local_irq_save(flags); + cpuctx = __get_cpu_context(ctx); + perf_ctx_lock(cpuctx, ctx); + task_ctx_sched_out(cpuctx, ctx, EVENT_ALL); + + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { + if (!event->attr.remove_on_exec) + continue; + + if (event->parent) + perf_group_detach(event); + list_del_event(event, ctx); + + list_add(&event->active_list, &free_list); + } + + ctx_resched(cpuctx, ctx, EVENT_ALL); + + clone_ctx = unclone_ctx(ctx); + --ctx->pin_count; + perf_ctx_unlock(cpuctx, ctx); + local_irq_restore(flags); + + list_for_each_entry_safe(event, next, &free_list, active_entry) { + list_del(&event->active_entry); + perf_event_exit_event(event, ctx, current, true); } +#endif + +unlock: + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); + if (clone_ctx) + put_ctx(clone_ctx); } struct perf_read_data { @@ -7615,20 +7682,18 @@ void perf_event_exec(void) struct perf_event_context *ctx; int ctxn; - rcu_read_lock(); for_each_task_context_nr(ctxn) { - ctx = current->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - perf_event_enable_on_exec(ctxn); + perf_event_remove_on_exec(ctxn); - perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, - true); + rcu_read_lock(); + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) { + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, + NULL, true); + } + rcu_read_unlock(); } - rcu_read_unlock(); - - perf_event_remove_on_exec(); } struct remote_output { @@ -12509,7 +12574,7 @@ static void sync_child_event(struct perf_event *child_event, static void perf_event_exit_event(struct perf_event *child_event, struct perf_event_context *child_ctx, - struct task_struct *child) + struct task_struct *child, bool removed) { struct perf_event *parent_event = child_event->parent; @@ -12526,12 +12591,15 @@ perf_event_exit_event(struct perf_event *child_event, * and being thorough is better. */ raw_spin_lock_irq(&child_ctx->lock); - WARN_ON_ONCE(child_ctx->is_active); + if (!removed) { + WARN_ON_ONCE(child_ctx->is_active); - if (parent_event) - perf_group_detach(child_event); - list_del_event(child_event, child_ctx); - perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */ + if (parent_event) + perf_group_detach(child_event); + list_del_event(child_event, child_ctx); + } + if (child_event->state >= PERF_EVENT_STATE_EXIT) + perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */ raw_spin_unlock_irq(&child_ctx->lock); /* @@ -12617,7 +12685,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) perf_event_task(child, child_ctx, 0); list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) - perf_event_exit_event(child_event, child_ctx, child); + perf_event_exit_event(child_event, child_ctx, child, false); mutex_unlock(&child_ctx->mutex);