On Tue, 2017-08-15 at 10:56 -0700, Dmitry Rogozhkin wrote: > This patch should probably be squashed with Tvrtko's PMU enabling > patch... > > i915 events don't have a CPU context to work with, so i915 > PMU should work in global mode, i.e. expose perf_invalid_context. > This will make the following call to perf: > perf stat workload.sh > to error out with "failed to read counter". Correct usage would be: > perf stat -a -C0 workload.sh > And we will get expected output: > > Another change required to make perf stat happy is properly support > pmu->del(): comments in del() declaration says that del() is required > to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets > event count thru this. > > Finally, if perf stat will be ran as follows: > perf stat -a workload.sh > i.e. without '-C0' option, then event will be initilized as many times > as there are CPUs. Thus, patch adds PMU refcounting support to avoid > multiple init/close of internal things. The issue which I did not > overcome is that in this case counters will be multiplied on number > of CPUs. Not sure whether it is possible to have some event enabling > CPU mask or rather I need to simply error out. > > Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee > Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------ > 2 files changed, 60 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index b59da2c..215d47b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2663,6 +2663,7 @@ struct drm_i915_private { > struct { > struct pmu base; > spinlock_t lock; > + unsigned int ref; > struct hrtimer timer; > bool timer_enabled; > u64 enable; > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index bcdf2bc..0972203 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event) > container_of(event->pmu, typeof(*i915), pmu.base); > struct intel_engine_cs *engine = NULL; > unsigned long flags; > + bool start_timer = false; > > spin_lock_irqsave(&i915->pmu.lock, flags); > > - if (is_engine_event(event)) { > - engine = intel_engine_lookup_user(i915, > - engine_event_class(event), > - engine_event_instance(event)); > - GEM_BUG_ON(!engine); > - engine->pmu.enable |= BIT(engine_event_sample(event)); > - } > - > - i915->pmu.enable |= event_enabled_mask(event); > + if (i915->pmu.ref++ == 0) { This was a stupid me: with this I support single busy_stat event only. Correct way to do that is to make busy_stats a refcount. I did that in the updated patch which I just sent. I wonder whether timer staff should be updated with the similar way, but I am not yet tried this part of the code to judge. > + if (is_engine_event(event)) { > + engine = intel_engine_lookup_user(i915, > + engine_event_class(event), > + engine_event_instance(event)); > + GEM_BUG_ON(!engine); > + engine->pmu.enable |= BIT(engine_event_sample(event)); > + } > > - if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) { > - hrtimer_start_range_ns(&i915->pmu.timer, > - ns_to_ktime(PERIOD), 0, > - HRTIMER_MODE_REL_PINNED); > - i915->pmu.timer_enabled = true; > - } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && > - !engine->pmu.busy_stats) { > - engine->pmu.busy_stats = true; > - if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) > - queue_work(i915->wq, &engine->pmu.enable_busy_stats); > + i915->pmu.enable |= event_enabled_mask(event); > + > + if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) { > + hrtimer_start_range_ns(&i915->pmu.timer, > + ns_to_ktime(PERIOD), 0, > + HRTIMER_MODE_REL_PINNED); > + i915->pmu.timer_enabled = true; > + } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && > + !engine->pmu.busy_stats) { > + engine->pmu.busy_stats = true; > + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) > + queue_work(i915->wq, &engine->pmu.enable_busy_stats); > + } > + start_timer = true; > } > > spin_unlock_irqrestore(&i915->pmu.lock, flags); > > - i915_pmu_timer_start(event); > + if (start_timer) > + i915_pmu_timer_start(event); > } > > static void i915_pmu_disable(struct perf_event *event) > @@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event) > struct drm_i915_private *i915 = > container_of(event->pmu, typeof(*i915), pmu.base); > unsigned long flags; > + bool timer_cancel = true; > u64 mask; > > spin_lock_irqsave(&i915->pmu.lock, flags); > > - if (is_engine_event(event)) { > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - > - engine = intel_engine_lookup_user(i915, > - engine_event_class(event), > - engine_event_instance(event)); > - GEM_BUG_ON(!engine); > - engine->pmu.enable &= ~BIT(engine_event_sample(event)); > - if (engine->pmu.busy_stats && > - !engine_needs_busy_stats(engine)) { > - engine->pmu.busy_stats = false; > - queue_delayed_work(i915->wq, > - &engine->pmu.disable_busy_stats, > - round_jiffies_up_relative(2 * HZ)); > + if (i915->pmu.ref && --i915->pmu.ref == 0) { > + if (is_engine_event(event)) { > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + engine = intel_engine_lookup_user(i915, > + engine_event_class(event), > + engine_event_instance(event)); > + GEM_BUG_ON(!engine); > + engine->pmu.enable &= ~BIT(engine_event_sample(event)); > + if (engine->pmu.busy_stats && > + !engine_needs_busy_stats(engine)) { > + engine->pmu.busy_stats = false; > + queue_delayed_work(i915->wq, > + &engine->pmu.disable_busy_stats, > + round_jiffies_up_relative(2 * HZ)); > + } > + mask = 0; > + for_each_engine(engine, i915, id) > + mask |= engine->pmu.enable; > + mask = (~mask) & ENGINE_SAMPLE_MASK; > + } else { > + mask = event_enabled_mask(event); > } > - mask = 0; > - for_each_engine(engine, i915, id) > - mask |= engine->pmu.enable; > - mask = (~mask) & ENGINE_SAMPLE_MASK; > - } else { > - mask = event_enabled_mask(event); > - } > > - i915->pmu.enable &= ~mask; > - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); > + i915->pmu.enable &= ~mask; > + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); > + timer_cancel = true; > + } > > spin_unlock_irqrestore(&i915->pmu.lock, flags); > > - i915_pmu_timer_cancel(event); > + if (timer_cancel) > + i915_pmu_timer_cancel(event); > } > > static void i915_pmu_event_start(struct perf_event *event, int flags) > @@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) > > static void i915_pmu_event_stop(struct perf_event *event, int flags) > { > + if (flags & PERF_EF_UPDATE) > + i915_pmu_event_read(event); > i915_pmu_disable(event); > } > > @@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) > > static void i915_pmu_event_del(struct perf_event *event, int flags) > { > - i915_pmu_disable(event); > + i915_pmu_event_stop(event, PERF_EF_UPDATE); > } > > static int i915_pmu_event_event_idx(struct perf_event *event) > @@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915) > return; > > i915->pmu.base.attr_groups = i915_pmu_attr_groups; > - i915->pmu.base.task_ctx_nr = perf_sw_context; > + i915->pmu.base.task_ctx_nr = perf_invalid_context; > i915->pmu.base.event_init = i915_pmu_event_init; > i915->pmu.base.add = i915_pmu_event_add; > i915->pmu.base.del = i915_pmu_event_del; _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx