On Thu, 2022-06-30 at 11:29 +0100, Tvrtko Ursulin wrote: > On 29/06/2022 19:46, Stuart Summers wrote: > > In the driver teardown, we are unregistering the gt prior > > to unregistering the PMU. This means there is a small window > > of time in which the application can request metrics from the > > PMU, some of which are calling into the uapi engines list, > > while the engines are not available. In this case we can > > see null pointer dereferences. > > > > Prevent this by simply checking if the engines are present > > when those PMU events come through. Print a debug message > > indicating when they aren't available. > > Obvious question - can we just move PMU unregister PMU to before > unregister GT? Well I wanted to push the workaround asap to get feedback. I submitted to trybot in parallel and it looks valid. I agree that's a more valid solution, but IMO we should have these checks anyway. At any rate, I'll resubmit with the move of the registrations. Thanks, Stuart > > Regards, > > Tvrtko > > > Signed-off-by: Stuart Summers <stuart.summers@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_pmu.c | 72 +++++++++++++++++++--------- > > ----- > > 1 file changed, 42 insertions(+), 30 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_pmu.c > > b/drivers/gpu/drm/i915/i915_pmu.c > > index 958b37123bf1..796a1d8e36f2 100644 > > --- a/drivers/gpu/drm/i915/i915_pmu.c > > +++ b/drivers/gpu/drm/i915/i915_pmu.c > > @@ -670,21 +670,28 @@ static void i915_pmu_enable(struct perf_event > > *event) > > if (is_engine_event(event)) { > > u8 sample = engine_event_sample(event); > > struct intel_engine_cs *engine; > > - > > - engine = intel_engine_lookup_user(i915, > > - engine_event_class(ev > > ent), > > - engine_event_instance > > (event)); > > - > > - BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != > > - I915_ENGINE_SAMPLE_COUNT); > > - BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != > > - I915_ENGINE_SAMPLE_COUNT); > > - GEM_BUG_ON(sample >= ARRAY_SIZE(engine- > > >pmu.enable_count)); > > - GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); > > - GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); > > - > > - engine->pmu.enable |= BIT(sample); > > - engine->pmu.enable_count[sample]++; > > + u8 class = engine_event_class(event); > > + u8 instance = engine_event_instance(event); > > + > > + engine = intel_engine_lookup_user(i915, class, > > instance); > > + if (engine) { > > + BUILD_BUG_ON(ARRAY_SIZE(engine- > > >pmu.enable_count) != > > + I915_ENGINE_SAMPLE_COUNT); > > + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != > > + I915_ENGINE_SAMPLE_COUNT); > > + GEM_BUG_ON(sample >= > > + ARRAY_SIZE(engine- > > >pmu.enable_count)); > > + GEM_BUG_ON(sample >= > > + ARRAY_SIZE(engine->pmu.sample)); > > + GEM_BUG_ON(engine->pmu.enable_count[sample] == > > ~0); > > + > > + engine->pmu.enable |= BIT(sample); > > + engine->pmu.enable_count[sample]++; > > + } else { > > + drm_dbg(&i915->drm, > > + "Invalid engine event: { class:%d, > > inst:%d }\n", > > + class, instance); > > + } > > } > > > > spin_unlock_irqrestore(&pmu->lock, flags); > > @@ -714,21 +721,26 @@ static void i915_pmu_disable(struct > > perf_event *event) > > if (is_engine_event(event)) { > > u8 sample = engine_event_sample(event); > > struct intel_engine_cs *engine; > > - > > - engine = intel_engine_lookup_user(i915, > > - engine_event_class(ev > > ent), > > - engine_event_instance > > (event)); > > - > > - GEM_BUG_ON(sample >= ARRAY_SIZE(engine- > > >pmu.enable_count)); > > - GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); > > - GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); > > - > > - /* > > - * Decrement the reference count and clear the enabled > > - * bitmask when the last listener on an event goes > > away. > > - */ > > - if (--engine->pmu.enable_count[sample] == 0) > > - engine->pmu.enable &= ~BIT(sample); > > + u8 class = engine_event_class(event); > > + u8 instance = engine_event_instance(event); > > + > > + engine = intel_engine_lookup_user(i915, class, > > instance); > > + if (engine) { > > + GEM_BUG_ON(sample >= ARRAY_SIZE(engine- > > >pmu.enable_count)); > > + GEM_BUG_ON(sample >= ARRAY_SIZE(engine- > > >pmu.sample)); > > + GEM_BUG_ON(engine->pmu.enable_count[sample] == > > 0); > > + > > + /* > > + * Decrement the reference count and clear the > > enabled > > + * bitmask when the last listener on an event > > goes away. > > + */ > > + if (--engine->pmu.enable_count[sample] == 0) > > + engine->pmu.enable &= ~BIT(sample); > > + } else { > > + drm_dbg(&i915->drm, > > + "Invalid engine event: { class:%d, > > inst:%d }\n", > > + class, instance); > > + } > > } > > > > GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));