Re: [PATCH v2 9/9] drm/i915/perf: Add support for OA media units

Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> · Fri, 24 Feb 2023 16:58:39 -0800

On Thu, Feb 23, 2023 at 12:05:02PM -0800, Dixit, Ashutosh wrote:
On Thu, 16 Feb 2023 16:58:50 -0800, Umesh Nerlige Ramappa wrote:


Hi Umesh,

MTL introduces additional OA units dedicated to media use cases. Add
support for programming these OA units by passing the media engine class
and instance parameters.

UMD specific changes for GPUvis support:
https://patchwork.freedesktop.org/patch/522827/?series=114023
https://patchwork.freedesktop.org/patch/522822/?series=114023
https://patchwork.freedesktop.org/patch/522826/?series=114023
https://patchwork.freedesktop.org/patch/522828/?series=114023
https://patchwork.freedesktop.org/patch/522816/?series=114023
https://patchwork.freedesktop.org/patch/522825/?series=114023

General comment about the patch in case I miss something out, as I've
mentioned previously in general let's try to replace INTEL_METEORLAKE and
IS_METEORLAKE checks in the patch with:

	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))

So that we don't have to enumerate each platform individually later.

Hmm, I recall that you had already commented about this at some point, 
sorry I missed that. Do you suggest I add this change in places outside 
this patch as well?


Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_pci.c          |   1 +
 drivers/gpu/drm/i915/i915_perf.c         | 247 ++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf_oa_regs.h |  78 +++++++
 drivers/gpu/drm/i915/i915_perf_types.h   |  40 ++++
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 include/uapi/drm/i915_drm.h              |   4 +
 7 files changed, 347 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0393273faa09..f3cacbf41c86 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -856,6 +856,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
	(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
 #define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
	(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
+#define HAS_OAM(dev_priv) \
+	(INTEL_INFO(dev_priv)->has_oam)

 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index a8d942b16223..621730b6551c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1028,6 +1028,7 @@ static const struct intel_device_info adl_p_info = {
	.has_mslice_steering = 1, \
	.has_oa_bpc_reporting = 1, \
	.has_oa_slice_contrib_limits = 1, \
+	.has_oam = 1, \
	.has_rc6 = 1, \
	.has_reset_engine = 1, \
	.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f028df812067..a57690f4c531 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
  */

 #include <linux/anon_inodes.h>
+#include <linux/nospec.h>
 #include <linux/sizes.h>
 #include <linux/uuid.h>

@@ -326,6 +327,13 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
	[I915_OAR_FORMAT_A32u40_A4u32_B8_C8]    = { 5, 256 },
	[I915_OA_FORMAT_A24u40_A14u32_B8_C8]    = { 5, 256 },
+	[I915_OAM_FORMAT_MPEC8u64_B8_C8]	= { 1, 192, TYPE_OAM, HDR_64_BIT },
+	[I915_OAM_FORMAT_MPEC8u32_B8_C8]	= { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+/* PERF_GROUP_OAG is unused for oa_base, drop it for mtl */

What does this comment mean?

There are multiple OAM units and the base for each is used to calculate 
the OA regs mmio address. OAG is just one unit with the same addresses 
for the regs, so we don't use this array that initializes the bases for 
OA units. Maybe I will add this in the comment here.


+static const u32 mtl_oa_base[] = {
+	[PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
 };

 #define SAMPLE_OA_REPORT      (1<<0)
@@ -418,11 +426,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
	kfree(oa_bo);
 }

+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+	return &stream->oa_buffer.group->regs;

Should just use stream->engine->oa_group->regs, see near the bottom.

+}
+
 static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
	struct intel_uncore *uncore = stream->uncore;

-	return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+	return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
	       GEN12_OAG_OATAILPTR_MASK;
 }

@@ -886,7 +900,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
		i915_reg_t oaheadptr;

		oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?

= 12 ?

-			    GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+			    __oa_regs(stream)->oa_head_ptr :
+			    GEN8_OAHEADPTR;

		spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);

@@ -939,7 +954,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
		return -EIO;

	oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?

= 12 ?

-		       GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+		       __oa_regs(stream)->oa_status :
+		       GEN8_OASTATUS;

	oastatus = intel_uncore_read(uncore, oastatus_reg);

@@ -1643,16 +1659,46 @@ free_noa_wait(struct i915_perf_stream *stream)
	i915_vma_unpin_and_release(&stream->noa_wait, 0);
 }

+/*
+ * intel_engine_lookup_user ensures that most of engine specific checks are
+ * taken care of, however, we can run into a case where the OA unit catering to
+ * the engine passed by the user is disabled for some reason. In such cases,
+ * ensure oa unit corresponding to an engine is functional. If there are no
+ * engines in the group, the unit is disabled.
+ */
+static bool oa_unit_functional(const struct intel_engine_cs *engine)
+{
+	return engine->oa_group && engine->oa_group->num_engines;
+}
+
 static bool engine_supports_oa(const struct intel_engine_cs *engine)
 {
	enum intel_platform platform = INTEL_INFO(engine->i915)->platform;

	switch (platform) {
+	case INTEL_METEORLAKE:
+		return engine->class == RENDER_CLASS ||
+		       ((engine->class == VIDEO_DECODE_CLASS ||
+			 engine->class == VIDEO_ENHANCEMENT_CLASS) &&
+			engine->gt->type == GT_MEDIA);
	default:
		return engine->class == RENDER_CLASS;
	}

As mentioned in a previous patch, this could just be:

	return engine->oa_group;

Because all these checks have already been done when the perf groups were
initialized so let's use that, as is done for oa_unit_functional.

Though, caution, to return engine->oa_group we'd have to remove
engine_supports_oa from __oa_engine_group, since engine->oa_group is not
yet assigned there. But I think the engine_supports_oa check in
__oa_engine_group is a duplication and should be removed.

 }

+static bool engine_class_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+	switch (engine->class) {
+	case RENDER_CLASS:
+		return type == TYPE_OAG;
+	case VIDEO_DECODE_CLASS:
+	case VIDEO_ENHANCEMENT_CLASS:
+		return type == TYPE_OAM;
+	default:
+		return false;
+	}
+}
+

Again, how about:

	return engine->oa_group && engine->oa_group->type == type;

Otherwise as mentioned below oa_group->type is unused and also incorrectly
assigned at present. The format type and group types are the same
(TYPE_OAG/TYPE_OAM). Can name the function engine_supports_oa_format.

 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
	struct i915_perf *perf = stream->perf;
@@ -1680,7 +1726,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
		drm_WARN_ON(&gt->i915->drm,
			    intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));

-	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(stream->uncore, g->fw_domains);
	intel_engine_pm_put(stream->engine);

	if (stream->ctx)
@@ -1804,8 +1850,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)

	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);

-	intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
-	intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
			   gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
	stream->oa_buffer.head = gtt_offset;

@@ -1817,9 +1863,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
	 *  to enable proper functionality of the overflow
	 *  bit."
	 */
-	intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
			   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
-	intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
			   gtt_offset & GEN12_OAG_OATAILPTR_MASK);

	/* Mark that we need updated tail pointers to read from... */
@@ -2579,7 +2625,8 @@ gen8_modify_self(struct intel_context *ce,
	return err;
 }

-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+				  struct i915_gem_context *ctx,
				  struct flex *flex, unsigned int count)
 {
	struct i915_gem_engines_iter it;
@@ -2589,7 +2636,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
		GEM_BUG_ON(ce == ce->engine->kernel_context);

-		if (!engine_supports_oa(ce->engine))
+		if (!engine_supports_oa(ce->engine) ||
+		    ce->engine->class != stream->engine->class)
			continue;

		/* Otherwise OA settings will be set upon first use */
@@ -2720,7 +2768,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,

		spin_unlock(&i915->gem.contexts.lock);

-		err = gen8_configure_context(ctx, regs, num_regs);
+		err = gen8_configure_context(stream, ctx, regs, num_regs);
		if (err) {
			i915_gem_context_put(ctx);
			return err;
@@ -2740,7 +2788,8 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
	for_each_uabi_engine(engine, i915) {
		struct intel_context *ce = engine->kernel_context;

-		if (!engine_supports_oa(ce->engine))
+		if (!engine_supports_oa(ce->engine) ||
+		    ce->engine->class != stream->engine->class)
			continue;

		regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
@@ -2765,6 +2814,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream,
		},
	};

+	if (stream->engine->class != RENDER_CLASS)
+		return 0;

OK, this is for render, nothing equivalent needed for media?

Media engines decided not to have anything configured in the CS 
contexts, rather everything is saved/restored in power context 
transitions, so nothing to be done here.


+
	return oa_configure_all_contexts(stream,
					 regs, ARRAY_SIZE(regs),
					 active);
@@ -2894,7 +2946,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
				   _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
	}

-	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
			   /* Disable clk ratio reports, like previous Gens. */
			   _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
					      GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
@@ -2904,7 +2956,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
			    */
			   oag_report_ctx_switches(stream));

-	intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
			   (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
			    GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
@@ -3058,8 +3110,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)

 static void gen12_oa_enable(struct i915_perf_stream *stream)
 {
-	struct intel_uncore *uncore = stream->uncore;
-	u32 report_format = stream->oa_buffer.format->format;
+	const struct i915_perf_regs *regs;
+	u32 val;

	/*
	 * If we don't want OA reports from the OA buffer, then we don't even
@@ -3070,9 +3122,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)

	gen12_init_oa_buffer(stream);

-	intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
-			   (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
-			   GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+	regs = __oa_regs(stream);
+	val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+	      GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+	intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
 }

 /**
@@ -3124,9 +3178,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream)
 {
	struct intel_uncore *uncore = stream->uncore;

-	intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+	intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
	if (intel_wait_for_register(uncore,
-				    GEN12_OAG_OACONTROL,
+				    __oa_regs(stream)->oa_ctrl,
				    GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
				    50))
		drm_err(&stream->perf->i915->drm,
@@ -3329,6 +3383,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,

	stream->sample_size = sizeof(struct drm_i915_perf_record_header);

+	stream->oa_buffer.group = g;

Should just use stream->engine->oa_group, see near the bottom.

	stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
	if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
		return -EINVAL;
@@ -3379,7 +3434,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
	 *   references will effectively disable RC6.
	 */
	intel_engine_pm_get(stream->engine);
-	intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get(stream->uncore, g->fw_domains);

	/*
	 * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
@@ -3440,7 +3495,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
		intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc);

 err_gucrc:
-	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(stream->uncore, g->fw_domains);
	intel_engine_pm_put(stream->engine);

	free_oa_configs(stream);
@@ -4033,6 +4088,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
				    struct perf_open_properties *props)
 {
	struct drm_i915_gem_context_param_sseu user_sseu;
+	const struct i915_oa_format *f;
	u64 __user *uprop = uprops;
	bool config_sseu = false;
	u8 class, instance;
@@ -4203,6 +4259,17 @@ static int read_properties_unlocked(struct i915_perf *perf,
	if (!engine_supports_oa(props->engine))
		return -EINVAL;

+	if (!oa_unit_functional(props->engine))
+		return -ENODEV;
+
+	i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);

Why do we need this (something to do with speculation)? Can just do
'&perf->oa_formats[props->oa_format]' below? The format passed in has
already been checked in the switch statement above.

Traced it back to "smatch cleanups" commit in rebase history. Something 
to do with static analysis. If not a major concern, I would leave it as 
is.


+	f = &perf->oa_formats[i];
+	if (!engine_class_supports_oa_format(props->engine, f->type)) {
+		DRM_DEBUG("Invalid OA format %d for class %d\n",
+			  f->type, props->engine->class);
+		return -EINVAL;
+	}
+
	if (config_sseu) {
		ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
		if (ret) {
@@ -4383,6 +4450,14 @@ static const struct i915_range gen12_oa_b_counters[] = {
	{}
 };

+static const struct i915_range mtl_oam_b_counters[] = {
+	{ .start = 0x393000, .end = 0x39301c },	/* GEN12_OAM_STARTTRIG1[1-8] */
+	{ .start = 0x393020, .end = 0x39303c },	/* GEN12_OAM_REPORTTRIG1[1-8] */
+	{ .start = 0x393040, .end = 0x39307c },	/* GEN12_OAM_CEC[0-7][0-1] */
+	{ .start = 0x393200, .end = 0x39323C },	/* MPES[0-7] */
+	{}
+};
+
 static const struct i915_range xehp_oa_b_counters[] = {
	{ .start = 0xdc48, .end = 0xdc48 },	/* OAA_ENABLE_REG */
	{ .start = 0xdd00, .end = 0xdd48 },	/* OAG_LCE0_0 - OAA_LENABLE_REG */
@@ -4429,13 +4504,16 @@ static const struct i915_range gen12_oa_mux_regs[] = {

 /*
  * Ref: 14010536224:
- * 0x20cc is repurposed on MTL, so use a separate array for MTL.
+ * 0x20cc is repurposed on MTL, so use a separate array for MTL. Also add the
+ * MPES/MPEC registers.

MPES/MPEC registers are added above now, not here so maybe get rid of the
comment change above?

  */
 static const struct i915_range mtl_oa_mux_regs[] = {
	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
+	{ .start = 0x38d100, .end = 0x38d114},	/* VISACTL */
+	{}
 };

 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
@@ -4473,10 +4551,26 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
	return reg_in_range_table(addr, gen12_oa_b_counters);
 }

+static bool xehp_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+	enum intel_platform platform = INTEL_INFO(perf->i915)->platform;
+
+	if (!HAS_OAM(perf->i915))
+		return false;
+
+	switch (platform) {
+	case INTEL_METEORLAKE:
+		return reg_in_range_table(addr, mtl_oam_b_counters);
+	default:
+		return false;
+	}

Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', registers
are identical in later platforms too.

Should the function prefix be xehp or mtl? Don't see xehp in bspec,
probably xehp is discontinued.

+}
+
 static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
 {
	return reg_in_range_table(addr, xehp_oa_b_counters) ||
-		reg_in_range_table(addr, gen12_oa_b_counters);
+		reg_in_range_table(addr, gen12_oa_b_counters) ||
+		xehp_is_valid_oam_b_counter_addr(perf, addr);
 }

 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
@@ -4846,11 +4940,39 @@ static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;

	switch (platform) {
+	case INTEL_METEORLAKE:
+		return 1;

I don't think we need this, as proposed previously maybe the function
should just unconditionally return 1.

	default:
		return 1;
	}
 }

+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+	enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
+	struct intel_gt *gt = engine->gt;
+	u32 group = PERF_GROUP_INVALID;
+
+	switch (platform) {
+	case INTEL_METEORLAKE:

Replace here with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))'.

+		/*
+		 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+		 * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+		 */
+		drm_WARN_ON(&engine->i915->drm,
+			    engine->gt->type != GT_MEDIA);
+
+		group = PERF_GROUP_OAM_SAMEDIA_0;
+		break;
+	default:
+		break;
+	}
+
+	drm_WARN_ON(&gt->i915->drm, group >= __num_perf_groups_per_gt(gt));
+
+	return group;
+}
+
 static u32 __oa_engine_group(struct intel_engine_cs *engine)
 {
	if (!engine_supports_oa(engine))

As mentioned above for engine_supports_oa, this looks like a duplication of
the checks below and should probably be removed.

I recall I added this because on older platforms, we were seeing a 
warnON splat from __oam_engine_group because those platforms had a media 
engine. Ideally, I would implement all your inputs in this patch and 
drop the warnON from __oam_engine_group. Is that okay?


@@ -4860,11 +4982,58 @@ static u32 __oa_engine_group(struct intel_engine_cs *engine)
	case RENDER_CLASS:
		return PERF_GROUP_OAG;

+	case VIDEO_DECODE_CLASS:
+	case VIDEO_ENHANCEMENT_CLASS:
+		return __oam_engine_group(engine);
+
	default:
		return PERF_GROUP_INVALID;
	}
 }

+static struct i915_perf_regs __oam_regs(u32 base)
+{
+	return (struct i915_perf_regs) {
+		base,
+		GEN12_OAM_HEAD_POINTER(base),
+		GEN12_OAM_TAIL_POINTER(base),
+		GEN12_OAM_BUFFER(base),
+		GEN12_OAM_CONTEXT_CONTROL(base),
+		GEN12_OAM_CONTROL(base),
+		GEN12_OAM_DEBUG(base),
+		GEN12_OAM_STATUS(base),
+		GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+	};
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+	return (struct i915_perf_regs) {
+		0,
+		GEN12_OAG_OAHEADPTR,
+		GEN12_OAG_OATAILPTR,
+		GEN12_OAG_OABUFFER,
+		GEN12_OAG_OAGLBCTXCTRL,
+		GEN12_OAG_OACONTROL,
+		GEN12_OAG_OA_DEBUG,
+		GEN12_OAG_OASTATUS,
+		GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+	};
+}
+
+static void oa_init_regs(struct intel_gt *gt, u32 id)
+{
+	struct i915_perf_group *group = &gt->perf.group[id];
+	struct i915_perf_regs *regs = &group->regs;
+
+	if (id == PERF_GROUP_OAG && gt->type != GT_MEDIA)
+		*regs = __oag_regs();
+	else if (IS_METEORLAKE(gt->i915))

Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', OAM
registers are identical in later platforms too. Maybe get rid of drm_WARN
below?

+		*regs = __oam_regs(mtl_oa_base[id]);
+	else
+		drm_WARN(&gt->i915->drm, 1, "Unsupported platform for OA\n");
+}
+
 static void oa_init_groups(struct intel_gt *gt)
 {
	int i, num_groups = gt->perf.num_perf_groups;
@@ -4881,6 +5050,24 @@ static void oa_init_groups(struct intel_gt *gt)
		g->oa_unit_id = perf->oa_unit_ids++;

		g->gt = gt;
+		oa_init_regs(gt, i);
+		g->fw_domains = FORCEWAKE_ALL;
+		if (i == PERF_GROUP_OAG) {
+			g->type = TYPE_OAG;
+
+			/*
+			 * Enabling all fw domains for OAG caps the max GT
+			 * frequency to media FF max. This could be less than
+			 * what the user sets through the sysfs and perf
+			 * measurements could be skewed. Since some platforms
+			 * have separate OAM units to measure media perf, do not
+			 * enable media fw domains for OAG.
+			 */
+			if (HAS_OAM(gt->i915))
+				g->fw_domains = FORCEWAKE_GT | FORCEWAKE_RENDER;

Is this needed even when media and render are separate tiles, which is the
only case we have in this code right now? For separate tiles setting
FORCEWAKE_ALL should not cap the freq, correct?

If not needed we can get rid of g->fw_domains.

+		} else {
+			g->type = TYPE_OAM;

This is wrong, because num_perf_groups is 1. So the type should be assigned
not based on i (which is always 0) but maybe similar to what is done in
oa_init_regs above.

That's a bug. Will fix. It was working as expected for some other 
platforms that had OAM, but with PERF_GROUP_OAM_SAMEDIA_0, it broke.

Thanks,
Umesh