This patch should probably be squashed with Tvrtko's PMU enabling patch... i915 events don't have a CPU context to work with, so i915 PMU should work in global mode, i.e. expose perf_invalid_context. This will make the following call to perf: perf stat workload.sh to error out with "failed to read counter". Correct usage would be: perf stat -a -C0 workload.sh And we will get expected output: Another change required to make perf stat happy is properly support pmu->del(): comments in del() declaration says that del() is required to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets event count thru this. Finally, if perf stat will be ran as follows: perf stat -a workload.sh i.e. without '-C0' option, then event will be initilized as many times as there are CPUs. Thus, patch adds PMU refcounting support to avoid multiple init/close of internal things. The issue which I did not overcome is that in this case counters will be multiplied on number of CPUs. Not sure whether it is possible to have some event enabling CPU mask or rather I need to simply error out. Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------ 2 files changed, 60 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b59da2c..215d47b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2663,6 +2663,7 @@ struct drm_i915_private { struct { struct pmu base; spinlock_t lock; + unsigned int ref; struct hrtimer timer; bool timer_enabled; u64 enable; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index bcdf2bc..0972203 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); struct intel_engine_cs *engine = NULL; unsigned long flags; + bool start_timer = false; spin_lock_irqsave(&i915->pmu.lock, flags); - if (is_engine_event(event)) { - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(engine_event_sample(event)); - } - - i915->pmu.enable |= event_enabled_mask(event); + if (i915->pmu.ref++ == 0) { + if (is_engine_event(event)) { + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable |= BIT(engine_event_sample(event)); + } - if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) { - hrtimer_start_range_ns(&i915->pmu.timer, - ns_to_ktime(PERIOD), 0, - HRTIMER_MODE_REL_PINNED); - i915->pmu.timer_enabled = true; - } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && - !engine->pmu.busy_stats) { - engine->pmu.busy_stats = true; - if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) - queue_work(i915->wq, &engine->pmu.enable_busy_stats); + i915->pmu.enable |= event_enabled_mask(event); + + if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) { + hrtimer_start_range_ns(&i915->pmu.timer, + ns_to_ktime(PERIOD), 0, + HRTIMER_MODE_REL_PINNED); + i915->pmu.timer_enabled = true; + } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && + !engine->pmu.busy_stats) { + engine->pmu.busy_stats = true; + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) + queue_work(i915->wq, &engine->pmu.enable_busy_stats); + } + start_timer = true; } spin_unlock_irqrestore(&i915->pmu.lock, flags); - i915_pmu_timer_start(event); + if (start_timer) + i915_pmu_timer_start(event); } static void i915_pmu_disable(struct perf_event *event) @@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event) struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); unsigned long flags; + bool timer_cancel = true; u64 mask; spin_lock_irqsave(&i915->pmu.lock, flags); - if (is_engine_event(event)) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable &= ~BIT(engine_event_sample(event)); - if (engine->pmu.busy_stats && - !engine_needs_busy_stats(engine)) { - engine->pmu.busy_stats = false; - queue_delayed_work(i915->wq, - &engine->pmu.disable_busy_stats, - round_jiffies_up_relative(2 * HZ)); + if (i915->pmu.ref && --i915->pmu.ref == 0) { + if (is_engine_event(event)) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable &= ~BIT(engine_event_sample(event)); + if (engine->pmu.busy_stats && + !engine_needs_busy_stats(engine)) { + engine->pmu.busy_stats = false; + queue_delayed_work(i915->wq, + &engine->pmu.disable_busy_stats, + round_jiffies_up_relative(2 * HZ)); + } + mask = 0; + for_each_engine(engine, i915, id) + mask |= engine->pmu.enable; + mask = (~mask) & ENGINE_SAMPLE_MASK; + } else { + mask = event_enabled_mask(event); } - mask = 0; - for_each_engine(engine, i915, id) - mask |= engine->pmu.enable; - mask = (~mask) & ENGINE_SAMPLE_MASK; - } else { - mask = event_enabled_mask(event); - } - i915->pmu.enable &= ~mask; - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + i915->pmu.enable &= ~mask; + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + timer_cancel = true; + } spin_unlock_irqrestore(&i915->pmu.lock, flags); - i915_pmu_timer_cancel(event); + if (timer_cancel) + i915_pmu_timer_cancel(event); } static void i915_pmu_event_start(struct perf_event *event, int flags) @@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) static void i915_pmu_event_stop(struct perf_event *event, int flags) { + if (flags & PERF_EF_UPDATE) + i915_pmu_event_read(event); i915_pmu_disable(event); } @@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) static void i915_pmu_event_del(struct perf_event *event, int flags) { - i915_pmu_disable(event); + i915_pmu_event_stop(event, PERF_EF_UPDATE); } static int i915_pmu_event_event_idx(struct perf_event *event) @@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915) return; i915->pmu.base.attr_groups = i915_pmu_attr_groups; - i915->pmu.base.task_ctx_nr = perf_sw_context; + i915->pmu.base.task_ctx_nr = perf_invalid_context; i915->pmu.base.event_init = i915_pmu_event_init; i915->pmu.base.add = i915_pmu_event_add; i915->pmu.base.del = i915_pmu_event_del; -- 1.8.3.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx