On Mon, Aug 29, 2016 at 03:02:13PM +0200, Peter Zijlstra wrote: > On Mon, Aug 29, 2016 at 12:03:09PM +0200, Peter Zijlstra wrote: > > @@ -1802,8 +1802,18 @@ event_sched_out(struct perf_event *event, > > > > event->tstamp_stopped = tstamp; > > event->pmu->del(event, 0); > > - event->oncpu = -1; > > - event->state = PERF_EVENT_STATE_INACTIVE; > > + > > + WRITE_ONCE(event->state, PERF_EVENT_STATE_INACTIVE); > > + /* > > + * pmu::del() will have updated the event count. Now mark it inactive, > > + * but take care to clear ->oncpu after the INACTIVE store, such that > > + * while ->state == ACTIVE, ->oncpu must be valid. > > + * > > + * See event_sched_in(), perf_event_restart() and perf_event_read(). > > + */ > > + smp_wmb(); > > + WRITE_ONCE(event->oncpu, -1); > > + > > if (event->pending_disable) { > > event->pending_disable = 0; > > event->state = PERF_EVENT_STATE_OFF; > > @@ -2015,8 +2025,10 @@ event_sched_in(struct perf_event *event, > > > > WRITE_ONCE(event->oncpu, smp_processor_id()); > > /* > > - * Order event::oncpu write to happen before the ACTIVE state > > - * is visible. > > + * Order event::oncpu write to happen before the ACTIVE state is > > + * visible, such that when we observe ACTIVE, oncpu must be correct. > > + * > > + * Matches the smp_rmb() in perf_event_restart(). > > */ > > smp_wmb(); > > WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE); > > Urgh.. that cannot work either, because now perf_event_read() can race > against event_sched_in(). Since that's no longer crossed. > > > @@ -3561,28 +3576,36 @@ u64 perf_event_read_local(struct perf_event *event) > > > > static int perf_event_read(struct perf_event *event, bool group) > > { > > - int ret = 0, cpu_to_read, local_cpu; > > + int ret = 0, cpu_to_read, local_cpu, state; > > + > > + local_cpu = get_cpu(); /* disable preemption to hold off hotplut */ > > + cpu_to_read = READ_ONCE(event->oncpu); > > + /* > > + * Matches smp_wmb() from event_sched_out(), ->oncpu must be valid > > + * IFF we observe ACTIVE. > > + */ > > + smp_rmb(); > > + state = READ_ONCE(event->state); > > The best I can come up with is something like: > > > do { > state = READ_ONCE(event->state); > if (state != ACTIVE) > break; > smp_rmb(); > cpu = READ_ONCE(event->cpu); > smp_rmb(); > } while (READ_ONCE(event->state) != state); couldn't we just call smp_call_function_single(cpu_to_read, __perf_event_read, ... once we read oncpu != -1 ? __perf_event_read then checks safely if event is active jirka --- diff --git a/kernel/events/core.c b/kernel/events/core.c index eed96b85503f..b99b791c16bb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3421,6 +3421,7 @@ out: struct perf_read_data { struct perf_event *event; bool group; + bool inactive; int ret; }; @@ -3562,22 +3563,15 @@ u64 perf_event_read_local(struct perf_event *event) static int perf_event_read(struct perf_event *event, bool group) { int ret = 0, cpu_to_read, local_cpu; - - /* - * If event is enabled and currently active on a CPU, update the - * value in the event structure: - */ - if (event->state == PERF_EVENT_STATE_ACTIVE) { + local_cpu = get_cpu(); + cpu_to_read = find_cpu_to_read(event, local_cpu); + if (cpu_to_read != -1) { struct perf_read_data data = { .event = event, .group = group, .ret = 0, }; - local_cpu = get_cpu(); - cpu_to_read = find_cpu_to_read(event, local_cpu); - put_cpu(); - ret = smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1); /* The event must have been read from an online CPU: */ WARN_ON_ONCE(ret); @@ -3603,6 +3597,7 @@ static int perf_event_read(struct perf_event *event, bool group) raw_spin_unlock_irqrestore(&ctx->lock, flags); } + put_cpu(); return ret; } -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html