Re: [PATCH v2 04/15] drm/i915/perf: Determine gen12 oa ctx offset at runtime

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 27, 2022 at 04:24:01PM -0700, Dixit, Ashutosh wrote:
On Fri, 23 Sep 2022 13:11:43 -0700, Umesh Nerlige Ramappa wrote:


Hi Umesh,

Some SKUs of same gen12 platform may have different oactxctrl
offsets. For gen12, determine oactxctrl offsets at runtime.

So seems we are writing code to extract static information for products
just because it is not documented?

Documented but never accurate. The only accurate values are obtained with real hardware. In addition DG2 has different offsets for different variants, so this patch.



v2: (Lionel)
- Move MI definitions to intel_gpu_commands.h
- Ensure __find_reg_in_lri does read past context image size

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   4 +
 drivers/gpu/drm/i915/i915_perf.c             | 146 +++++++++++++++----
 drivers/gpu/drm/i915/i915_perf_oa_regs.h     |   2 +-
 3 files changed, 121 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index d4e9702d3c8e..f50ea92910d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -187,6 +187,10 @@
 #define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
 #define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/

+#define MI_OPCODE(x)		(((x) >> 23) & 0x3f)
+#define IS_MI_LRI_CMD(x)	(MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
+#define MI_LRI_LEN(x)		(((x) & 0xff) + 1)
+
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index cd57b5836386..fb705f24705b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1358,6 +1358,64 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
	return 0;
 }

+#define __valid_oactxctrl_offset(x) ((x) && (x) != U32_MAX)
+static bool __find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
+{
+	u32 idx = *offset;
+	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);

The code below works only if MI_LRI_LEN() is an even number (because of
'idx += 2', which I think should always be the case but not sure if we it
makes sense to add an assert etc.

+
+	idx++;
+	for (; idx < len; idx += 2)
+		if (state[idx] == reg)
+			break;
+
+	*offset = idx;
+	return state[idx] == reg;

I think this can be a bug if 'idx > len' but 'state[idx] == reg'. So we
need to do something like:

static bool __find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
{
	u32 idx = *offset;
	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
	bool found = false;

	idx++;
	for (; idx < len; idx += 2)
		if (state[idx] == reg)
			found = true;

	*offset = idx;
	return found;


Agree, will add found.

+}
+
+static u32 __context_image_offset(struct intel_context *ce, u32 reg)
+{
+	u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
+	u32 *state = ce->lrc_reg_state;
+
+	for (offset = 0; offset < len; ) {
+		if (IS_MI_LRI_CMD(state[offset])) {
+			if (__find_reg_in_lri(state, reg, &offset, len))
+				break;
+		} else {
+			offset++;
+		}
+	}
+
+	return offset < len ? offset : U32_MAX;
+}
+
+static int __set_oa_ctx_ctrl_offset(struct intel_context *ce)

I have seen people complain about unnecessary double underscores in front
of function names ;-)

will remove/change to oa_*.


+{
+	i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
+	struct i915_perf *perf = &ce->engine->i915->perf;
+	u32 saved_offset = perf->ctx_oactxctrl_offset;
+	u32 offset;
+
+	/* Do this only once. Failure is stored as offset of U32_MAX */
+	if (saved_offset)
+		return 0;

But if saved_offset is U32_MAX we should be returning -ENODEV?

correct, the above if block should be:

if (__valid_oactxctrl_offset(offset))
	return 0;

if (saved_offset == U32_MAX)
	return -ENODEV;


+
+	offset = __context_image_offset(ce, i915_mmio_reg_offset(reg));
+	perf->ctx_oactxctrl_offset = offset;
+
+	drm_dbg(&ce->engine->i915->drm,
+		"%s oa ctx control at 0x%08x dword offset\n",
+		ce->engine->name, offset);
+
+	return __valid_oactxctrl_offset(offset) ? 0 : -ENODEV;
+}
+
+static bool engine_supports_mi_query(struct intel_engine_cs *engine)
+{
+	return engine->class == RENDER_CLASS;
+}
+
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1377,6 +1435,17 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
	if (IS_ERR(ce))
		return PTR_ERR(ce);

+	if (engine_supports_mi_query(stream->engine)) {
+		ret = __set_oa_ctx_ctrl_offset(ce);
+		if (ret && !(stream->sample_flags & SAMPLE_OA_REPORT)) {

This is not a problem in SAMPLE_OA_REPORT case?

SAMPLE_OA_REPORT is OAG use case.

Actually, I did not know how to treat this condition. The current interface will configure both OAR and OAG. If we have an error configuring OAR, should we fail or let the OAG use case work?

I am now leaning towards failing this unconditionally. Thoughts?


+			intel_context_unpin(ce);
+			drm_err(&stream->perf->i915->drm,
+				"Enabling perf query failed for %s\n",
+				stream->engine->name);
+			return ret;
+		}
+	}
+
	switch (GRAPHICS_VER(ce->engine->i915)) {
	case 7: {
		/*
@@ -2408,10 +2477,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
	int err;
	struct intel_context *ce = stream->pinned_ctx;
	u32 format = stream->oa_buffer.format;
+	u32 offset = stream->perf->ctx_oactxctrl_offset;
	struct flex regs_context[] = {
		{
			GEN8_OACTXCONTROL,
-			stream->perf->ctx_oactxctrl_offset + 1,
+			offset + 1,
			active ? GEN8_OA_COUNTER_RESUME : 0,
		},
	};
@@ -2436,15 +2506,18 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
		},
	};

-	/* Modify the context image of pinned context with regs_context*/
-	err = intel_context_lock_pinned(ce);
-	if (err)
-		return err;
+	/* Modify the context image of pinned context with regs_context */
+	if (__valid_oactxctrl_offset(offset)) {

This check is not needed, if we didn't have valid a offset we should return
error from oa_get_render_ctx_id.

Hmm, I guess so.


+		err = intel_context_lock_pinned(ce);
+		if (err)
+			return err;

-	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
-	intel_context_unlock_pinned(ce);
-	if (err)
-		return err;
+		err = gen8_modify_context(ce, regs_context,
+					  ARRAY_SIZE(regs_context));
+		intel_context_unlock_pinned(ce);
+		if (err)
+			return err;
+	}


	/* Apply regs_lri using LRI with pinned context */
	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
@@ -2566,6 +2639,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
			   const struct i915_oa_config *oa_config,
			   struct i915_active *active)
 {
+	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
	/* The MMIO offsets for Flex EU registers aren't contiguous */
	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
 #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
@@ -2576,7 +2650,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
		},
		{
			GEN8_OACTXCONTROL,
-			stream->perf->ctx_oactxctrl_offset + 1,
+			ctx_oactxctrl + 1,
		},
		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -4545,6 +4619,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
	}
 }

+static void i915_perf_init_info(struct drm_i915_private *i915)
+{
+	struct i915_perf *perf = &i915->perf;
+
+	switch (GRAPHICS_VER(i915)) {
+	case 8:
+		perf->ctx_oactxctrl_offset = 0x120;
+		perf->ctx_flexeu0_offset = 0x2ce;
+		perf->gen8_valid_ctx_bit = BIT(25);
+		break;
+	case 9:
+		perf->ctx_oactxctrl_offset = 0x128;
+		perf->ctx_flexeu0_offset = 0x3de;
+		perf->gen8_valid_ctx_bit = BIT(16);
+		break;
+	case 11:
+		perf->ctx_oactxctrl_offset = 0x124;
+		perf->ctx_flexeu0_offset = 0x78e;
+		perf->gen8_valid_ctx_bit = BIT(16);
+		break;
+	case 12:
+		/*
+		 * Calculate offset at runtime in oa_pin_context for gen12 and
+		 * cache the value in perf->ctx_oactxctrl_offset.
+		 */

What about ctx_flexeu0_offset and gen8_valid_ctx_bit for Gen12+?

For gen12 ctx_flexeu0_offset are no longer part of the context image, so not present.

gen8_valid_ctx_bit is also not defined for gen12.

Thanks,
Umesh


Thanks.
--
Ashutosh



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux