On Wed, 09 Oct 2024, Ville Syrjala <ville.syrjala@xxxxxxxxxxxxxxx> wrote: > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > > Implement pmu support for gen2 so that one can use intel_gpu_top > on it once again. > > Gen2 lacks MI_MODE/MODE_IDLE so we'll have to do a bit more work > to determine the state of the engine: > - to determine if the ring contains unconsumed data we can simply > compare RING_TAIL vs. RING_HEAD > - also check RING_HEAD vs. ACTHD to catch cases where the hardware > is still executing a batch buffer but the ring head has already > caught up with the tail. Not entirely sure if that's actually > possible or not, but maybe it can happen if the batch buffer is > initiated from the very end of the ring? But even if not strictly > necessary there's no real harm in checking anyway. > - MI_WAIT_FOR_EVENT can be detected via a dedicated bit in RING_HEAD > > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_engine_regs.h | 2 +- > drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++++++---- > 2 files changed, 27 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h > index a8eac59e3779..1c4784cb296c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h > @@ -15,6 +15,7 @@ > #define HEAD_WRAP_COUNT 0xFFE00000 > #define HEAD_WRAP_ONE 0x00200000 > #define HEAD_ADDR 0x001FFFFC > +#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ > #define RING_START(base) _MMIO((base) + 0x38) > #define RING_CTL(base) _MMIO((base) + 0x3c) > #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ > @@ -26,7 +27,6 @@ > #define RING_VALID_MASK 0x00000001 > #define RING_VALID 0x00000001 > #define RING_INVALID 0x00000000 > -#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ > #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ > #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ > #define RING_SYNC_0(base) _MMIO((base) + 0x40) > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index 67b6cbdeff1d..2eed4c866321 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -356,7 +356,7 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) > return GRAPHICS_VER(i915) == 7; > } > > -static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) > +static void engine_sample_gen3(struct intel_engine_cs *engine, unsigned int period_ns) A bit surprising to see a gen3 suffix rather than a prefix. Anyway, not going to spend time on looking into gen2 details, but it's fine, Acked-by: Jani Nikula <jani.nikula@xxxxxxxxx> > { > struct intel_engine_pmu *pmu = &engine->pmu; > bool busy; > @@ -391,6 +391,31 @@ static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns > add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); > } > > +static void engine_sample_gen2(struct intel_engine_cs *engine, unsigned int period_ns) > +{ > + struct intel_engine_pmu *pmu = &engine->pmu; > + u32 tail, head, acthd; > + > + tail = ENGINE_READ_FW(engine, RING_TAIL); > + head = ENGINE_READ_FW(engine, RING_HEAD); > + acthd = ENGINE_READ_FW(engine, ACTHD); > + > + if (head & HEAD_WAIT_I8XX) > + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); > + > + if (head & HEAD_WAIT_I8XX || head != acthd || > + (head & HEAD_ADDR) != (tail & TAIL_ADDR)) > + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); > +} > + > +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) > +{ > + if (GRAPHICS_VER(engine->i915) >= 3) > + engine_sample_gen3(engine, period_ns); > + else > + engine_sample_gen2(engine, period_ns); > +} > + > static void > engines_sample(struct intel_gt *gt, unsigned int period_ns) > { > @@ -1243,11 +1268,6 @@ void i915_pmu_register(struct drm_i915_private *i915) > > int ret = -ENOMEM; > > - if (GRAPHICS_VER(i915) <= 2) { > - drm_info(&i915->drm, "PMU not supported for this GPU."); > - return; > - } > - > spin_lock_init(&pmu->lock); > hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > pmu->timer.function = i915_sample; -- Jani Nikula, Intel