This patch should probably be squashed with Tvrtko's PMU enabling patch... i915 events don't have a CPU context to work with, so i915 PMU should work in global mode, i.e. expose perf_invalid_context. This will make the following call to perf: perf stat -e i915/rcs0-busy/ workload.sh to error out with "failed to read counter". Correct usage would be: perf stat -e i915/rcs0-busy/ -a -C0 workload.sh And we will get expected output: Another change required to make perf-stat happy is properly support pmu->del(): comments in del() declaration says that del() is required to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets event count thru this. Finally, if perf stat will be ran as follows: perf stat -e i915/rcs0-busy/ -a workload.sh i.e. without '-C0' option, then event will be initilized as many times as there are CPUs. Thus, patch adds PMU refcounting support to avoid multiple init/close of internal things. The issue which I did not overcome is that in this case counters will be multiplied on number of CPUs. Not sure whether it is possible to have some event enabling CPU mask or rather I need to simply error out. v1: Make pmu.busy_stats a refcounter to avoid busy stats going away with some deleted event. Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_pmu.c | 37 ++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index bcdf2bc..fc29c75 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -469,11 +469,16 @@ static void i915_pmu_enable(struct perf_event *event) ns_to_ktime(PERIOD), 0, HRTIMER_MODE_REL_PINNED); i915->pmu.timer_enabled = true; - } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && - !engine->pmu.busy_stats) { - engine->pmu.busy_stats = true; - if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) - queue_work(i915->wq, &engine->pmu.enable_busy_stats); + } else if (is_engine_event(event) && engine_needs_busy_stats(engine)) { + /* Enable busy stats for the first event demanding it, next + * events just reference counter. So, if some events will go + * away, we will still have busy stats enabled till remaining + * events still use them. + */ + if (engine->pmu.busy_stats++ == 0) { + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) + queue_work(i915->wq, &engine->pmu.enable_busy_stats); + } } spin_unlock_irqrestore(&i915->pmu.lock, flags); @@ -495,16 +500,16 @@ static void i915_pmu_disable(struct perf_event *event) enum intel_engine_id id; engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); + engine_event_class(event), + engine_event_instance(event)); GEM_BUG_ON(!engine); engine->pmu.enable &= ~BIT(engine_event_sample(event)); - if (engine->pmu.busy_stats && - !engine_needs_busy_stats(engine)) { - engine->pmu.busy_stats = false; - queue_delayed_work(i915->wq, - &engine->pmu.disable_busy_stats, - round_jiffies_up_relative(2 * HZ)); + if (!engine_needs_busy_stats(engine)) { + if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) { + queue_delayed_work(i915->wq, + &engine->pmu.disable_busy_stats, + round_jiffies_up_relative(2 * HZ)); + } } mask = 0; for_each_engine(engine, i915, id) @@ -529,6 +534,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) static void i915_pmu_event_stop(struct perf_event *event, int flags) { + if (flags & PERF_EF_UPDATE) + i915_pmu_event_read(event); i915_pmu_disable(event); } @@ -546,7 +553,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) static void i915_pmu_event_del(struct perf_event *event, int flags) { - i915_pmu_disable(event); + i915_pmu_event_stop(event, PERF_EF_UPDATE); } static int i915_pmu_event_event_idx(struct perf_event *event) @@ -687,7 +694,7 @@ void i915_pmu_register(struct drm_i915_private *i915) return; i915->pmu.base.attr_groups = i915_pmu_attr_groups; - i915->pmu.base.task_ctx_nr = perf_sw_context; + i915->pmu.base.task_ctx_nr = perf_invalid_context; i915->pmu.base.event_init = i915_pmu_event_init; i915->pmu.base.add = i915_pmu_event_add; i915->pmu.base.del = i915_pmu_event_del; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fd5d838..0530e4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -248,7 +248,7 @@ struct intel_engine_cs { struct { u32 enable; u64 sample[4]; - bool busy_stats; + unsigned int busy_stats; struct work_struct enable_busy_stats; struct delayed_work disable_busy_stats; } pmu; -- 1.8.3.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx