[PATCH 2/2] drm/i915: Engine relative MMIO for Gen12

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: John Harrison <John.C.Harrison@xxxxxxxxx>

Gen12 introduces a completely new and different scheme for
implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
requires using the base address of instance zero of the relevant
engine class. And then, it is only valid if the register in
question falls within a certain range as specified by a table.

Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
CC: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 185 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   7 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   9 +-
 drivers/gpu/drm/i915/i915_perf.c             |   3 +-
 4 files changed, 195 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index efe1c377d797..a65e8ccd9d8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -236,6 +236,127 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
 	return bases[i].base;
 }
 
+static void lri_init_remap_base(struct intel_engine_cs *engine)
+{
+	struct intel_engine_cs *remap_engine;
+
+	engine->lri_mmio_base = 0;
+
+	if (INTEL_GEN(engine->i915) < 12)
+		return;
+
+	remap_engine = engine->i915->gt.engine_class[engine->class][0];
+	GEM_BUG_ON(!remap_engine);
+
+	engine->lri_mmio_base = remap_engine->mmio_base;
+}
+
+static void lri_add_range(struct intel_engine_cs *engine, u32 min, u32 max)
+{
+	GEM_BUG_ON(engine->lri_num_ranges >= INTEL_MAX_LRI_RANGES);
+
+	engine->lri_ranges[engine->lri_num_ranges].min = min;
+	engine->lri_ranges[engine->lri_num_ranges].max = max;
+	engine->lri_num_ranges++;
+}
+
+static void lri_init_remap_ranges(struct intel_engine_cs *engine)
+{
+	bool has_aux_tables = true;	/* Removed after TGL? */
+	u32 offset;
+
+	engine->lri_num_ranges = 0;
+
+	if (INTEL_GEN(engine->i915) < 12)
+		return;
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		/* Hardware Front End */
+		lri_add_range(engine, 0x000 + engine->mmio_base,
+			      0x7FF + engine->mmio_base);
+
+		/* TRTT */
+		lri_add_range(engine, 0x4400, 0x441F);
+
+		/* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+		if (has_aux_tables)
+			lri_add_range(engine, 0x4200, 0x420F);
+		break;
+
+	case VIDEO_DECODE_CLASS:
+		lri_add_range(engine, 0x0000 + engine->mmio_base,
+			      0x3FFF + engine->mmio_base);
+
+		/* TRTT */
+		offset = ((engine->instance & 0x1) * 0x20) +
+			 ((engine->instance >> 1) * 0x100);
+		lri_add_range(engine, 0x4420 + offset, 0x443F + offset);
+
+		/* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+		if (has_aux_tables) {
+			switch (engine->instance) {
+			case 0:
+				lri_add_range(engine, 0x4210, 0x421F);
+				break;
+
+			case 1:
+				lri_add_range(engine, 0x4220, 0x422F);
+				break;
+
+			case 2:
+				lri_add_range(engine, 0x4290, 0x429F);
+				break;
+
+			case 3:
+				lri_add_range(engine, 0x42A0, 0x42AF);
+				break;
+
+			default:
+				break;
+			}
+		}
+		break;
+
+	case VIDEO_ENHANCEMENT_CLASS:
+		lri_add_range(engine, 0x0000 + engine->mmio_base,
+			      0x3FFF + engine->mmio_base);
+
+		/* TRTT */
+		offset = engine->instance * 0x100;
+		lri_add_range(engine, 0x4460 + offset, 0x447F + offset);
+
+		/* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+		if (has_aux_tables) {
+			switch (engine->instance) {
+			case 0:
+				lri_add_range(engine, 0x4230, 0x423F);
+				break;
+
+			case 1:
+				lri_add_range(engine, 0x42B0, 0x42BF);
+				break;
+
+			case 2:
+				lri_add_range(engine, 0x4290, 0x429F);
+				break;
+
+			case 3:
+				// Same address as instance 1???
+				lri_add_range(engine, 0x42B0, 0x42BF);
+				break;
+
+			default:
+				break;
+			}
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
 static u32 i915_get_lri_cmd_legacy(const struct intel_engine_cs *engine,
 				   u32 word_count)
 {
@@ -249,6 +370,27 @@ static u32 i915_get_lri_cmd_add_offset(const struct intel_engine_cs *engine,
 	       MI_LRI_ADD_CS_MMIO_START_GEN11;
 }
 
+static u32 i915_get_lri_cmd_remap(const struct intel_engine_cs *engine,
+				  u32 word_count)
+{
+	u32 word;
+
+	word = __MI_LOAD_REGISTER_IMM(word_count);
+
+	/* if (lri_is_reg_in_remap_table(engine, reg)) ??? */
+		word |= MI_LRI_MMIO_REMAP_GEN12;
+
+	/*
+	 * NB: To gate this on the reg address will require knowing
+	 * all reg addresses in advance. This is not currently the
+	 * case as some LRI commands are built from multiple sources.
+	 * Also, what if some regs require remap and some do not?
+	 * The LRI command would need to be split into multiple pieces.
+	 */
+
+	return word;
+}
+
 static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) < 11)
@@ -262,18 +404,53 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
 
 static void lri_init(struct intel_engine_cs *engine)
 {
-	if (i915_engine_has_relative_lri(engine))
-		engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
-	else
+	if (i915_engine_has_relative_lri(engine)) {
+		if (INTEL_GEN(engine->i915) < 12)
+			engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
+		else {
+			engine->get_lri_cmd = i915_get_lri_cmd_remap;
+
+			lri_init_remap_base(engine);
+			lri_init_remap_ranges(engine);
+		}
+	} else
 		engine->get_lri_cmd = i915_get_lri_cmd_legacy;
 }
 
+static bool lri_is_reg_in_remap_table(const struct intel_engine_cs *engine,
+				      i915_reg_t reg)
+{
+	int i;
+	u32 offset = i915_mmio_reg_offset(reg);
+
+	for (i = 0; i < engine->lri_num_ranges; i++) {
+		if (offset < engine->lri_ranges[i].min)
+			continue;
+
+		if (offset > engine->lri_ranges[i].max)
+			continue;
+
+		return true;
+	}
+
+	return false;
+}
+
 u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
 {
 	if (!i915_engine_has_relative_lri(engine))
 		return i915_mmio_reg_offset(reg);
 
-	return i915_mmio_reg_offset(reg) - engine->mmio_base;
+	if (INTEL_GEN(engine->i915) < 12)
+		return i915_mmio_reg_offset(reg) - engine->mmio_base;
+
+	if (!WARN_ON(lri_is_reg_in_remap_table(engine, reg))) {
+		/* Is this meant to happen? */
+		return i915_mmio_reg_offset(reg);
+	}
+
+	return i915_mmio_reg_offset(reg) - engine->mmio_base +
+	       engine->lri_mmio_base;
 }
 
 static void __sprint_engine_name(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7ca6c86a33f6..1e26f668e73b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -306,6 +306,13 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+#define INTEL_MAX_LRI_RANGES	3
+	struct lri_range {
+		u32 min, max;
+	} lri_ranges[INTEL_MAX_LRI_RANGES];
+	u32 lri_num_ranges;
+	u32 lri_mmio_base;
+
 	u32 (*get_lri_cmd)(const struct intel_engine_cs *engine,
 			   u32 word_count);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eaa019df0ce7..0ee62a61d7b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -130,14 +130,15 @@
  *   simply ignores the register load under certain conditions.
  * - One can actually load arbitrary many arbitrary registers: Simply issue x
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- * - Newer hardware supports engine relative addressing but older hardware does
- *   not. This is required for hw engine load balancing. Hence the MI_LRI
- *   instruction itself is prefixed with '__' and should only be used on
- *   legacy hardware code paths. Generic code must always use the MI_LRI
+ * - Newer hardware supports engine relative addressing but using multiple
+ *   incompatible schemes. This is required for hw engine load balancing. Hence
+ *   the MI_LRI instruction itself is prefixed with '__' and should only be
+ *   used on legacy hardware code paths. Generic code must always use the MI_LRI
  *   and i915_get_lri_reg() helper functions instead.
  */
 #define __MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define   MI_LRI_MMIO_REMAP_GEN12		(1<<17)
 #define   MI_LRI_ADD_CS_MMIO_START_GEN11	(1<<19)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 83abdda05ba2..f88642209283 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1694,7 +1694,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
 
 	/*
 	 * NB: The LRI instruction is generated by the hardware.
-	 * Should we read it in and assert that the offset flag is set?
+	 * Should we read it in and assert that the appropriate
+	 * offset flag is set?
 	 */
 
 	CTX_REG(ce->engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
-- 
2.21.0.5.gaeb582a983

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux