On 24/09/2019 00:51, John.C.Harrison@xxxxxxxxx wrote:
From: John Harrison <John.C.Harrison@xxxxxxxxx>
Gen12 introduces a completely new and different scheme for
implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
requires using the base address of instance zero of the relevant
engine class. And then, it is only valid if the register in
question falls within a certain range as specified by a table.
v2: Add support for fused parts where instance zero of a given engine
class may be missing. Make aux_tables presence a device flag rather
than just hard coded. Pre-calculate the correct LRI base address
rather than using a per-instance base and then adding on a delta to
the correct base later. Make all the table range checking a debug only
feature - the theory is that all driver access should be within the
remap ranges. [Daniele]
v3: Re-base on Mika's changes. This removes all the abstraction layer.
Which also means there is no common code path that all LRI accesses go
via. Thus it is not possible to implement the range check. However, as
noted in v2, the range check should not be necessary anyway.
Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
CC: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 27 +++++++++++++++++---
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 ++++---
drivers/gpu/drm/i915/gt/intel_mocs.c | 6 ++---
drivers/gpu/drm/i915/i915_perf.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.c | 14 ++++++++++
drivers/gpu/drm/i915/intel_device_info.h | 1 +
7 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5aa58e069806..09fbbffce419 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
return true;
}
-static void lri_init(struct intel_engine_cs *engine)
+static void lri_init(struct intel_engine_cs *engine, u32 first_instance)
{
if (i915_engine_has_relative_lri(engine)) {
- engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
- engine->lri_mmio_base = 0;
+ if (INTEL_GEN(engine->i915) < 12) {
+ engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
+ engine->lri_mmio_base = 0;//engine->mmio_base;
No C++ style comments please.
+ engine->lri_engine = engine;
+ } else {
+ engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12;
+
+ engine->lri_engine = engine->gt->engine_class
+ [engine->class][first_instance];
+ GEM_BUG_ON(!engine->lri_engine);
+ engine->lri_mmio_base = engine->lri_engine->mmio_base;
+
+ /* According to the bspec, accesses should be compared
Preferred style:
/*
* Blah
*/
+ * against the remap table and remapping only enabled
+ * if found. In practice, all driver accesses should be
+ * to remap registers. So, no need for the complication
+ * of checking against any device specific tables.
+ */
+ }
} else {
engine->lri_cmd_mode = 0;
engine->lri_mmio_base = engine->mmio_base;
+ engine->lri_engine = engine;
}
If it helps at all you could save one indentation level by doing:
if (!supports_lri) {
} else if (GEN == 11) {
} else {
}
}
@@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
- lri_init(engine);
+ lri_init(engine,
+ INTEL_INFO(engine->i915)->first_instance[engine->class]);
seqlock_init(&engine->stats.lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 38f486f7f7e3..62caa74e8558 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -308,6 +308,7 @@ struct intel_engine_cs {
u32 lri_mmio_base;
u32 lri_cmd_mode;
+ struct intel_engine_cs *lri_engine;
u32 uabi_capabilities;
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index bfb51d8d7ce2..9c5be384026f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -133,15 +133,16 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- * - Newer hardware supports engine relative addressing but older hardware does
- * not. This is required for hw engine load balancing. Hence the MI_LRI
- * instruction itself is prefixed with '__' and should only be used on
- * legacy hardware code paths. Generic code must always use the MI_LRI
+ * - Newer hardware supports engine relative addressing but using multiple
+ * incompatible schemes. This is required for hw engine load balancing. Hence
+ * the MI_LRI instruction itself is prefixed with '__' and should only be
Is the double underscore reference out of date now?
+ * used on legacy hardware code paths. Generic code must always use the MI_LRI
* and i915_get_lri_reg() helper functions instead.
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_MMIO_REMAP_GEN12 BIT(17)
#define MI_LRI_ADD_CS_MMIO_START_GEN11 BIT(19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 8e8fe3deb95c..e121fea5b33c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt)
static int emit_mocs_control_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table)
{
- enum intel_engine_id engine = rq->engine->id;
+ enum intel_engine_id engine_id = rq->engine->lri_engine->id;
Observation: If you don't rename the local diff is smaller.
unsigned int index;
u32 unused_value;
u32 *cs;
@@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq,
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
*cs++ = unused_value;
}
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8b85cdfada21..fd628ae12343 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
/*
* NB: The LRI instruction is generated by the hardware.
- * Should we read it in and assert that the offset flag is set?
+ * Should we read it in and assert that the appropriate
+ * offset flag is set?
*/
CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 85e480bdc673..dece012d3ad4 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -1030,6 +1030,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
u32 media_fuse;
u16 vdbox_mask;
u16 vebox_mask;
+ bool found;
if (INTEL_GEN(dev_priv) < 11)
return;
@@ -1040,6 +1041,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
GEN11_GT_VEBOX_DISABLE_SHIFT;
+ found = false;
for (i = 0; i < I915_MAX_VCS; i++) {
if (!HAS_ENGINE(dev_priv, _VCS(i))) {
vdbox_mask &= ~BIT(i);
@@ -1059,11 +1061,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
*/
if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
+
+ if (!found) {
+ found = true;
+ info->first_instance[VIDEO_DECODE_CLASS] = i;
+ }
}
DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
vdbox_mask, VDBOX_MASK(dev_priv));
GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv));
+ found = false;
for (i = 0; i < I915_MAX_VECS; i++) {
if (!HAS_ENGINE(dev_priv, _VECS(i))) {
vebox_mask &= ~BIT(i);
@@ -1073,6 +1081,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
if (!(BIT(i) & vebox_mask)) {
info->engine_mask &= ~BIT(_VECS(i));
DRM_DEBUG_DRIVER("vecs%u fused off\n", i);
+ continue;
+ }
+
+ if (!found) {
+ found = true;
+ info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i;
}
}
DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n",
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0cdc2465534b..75a9950969fb 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -152,6 +152,7 @@ struct intel_device_info {
u8 gen;
u8 gt; /* GT number, 0 if undefined */
intel_engine_mask_t engine_mask; /* Engines supported by the HW */
+ u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */
u32 is overkill however do you even need to store this persistently?
Looks like all that is needed at runtime is engine->lri_engine? So you
could lookup the first instance from intel_engine_setup. Or if I missed
something and you do need it at runtime, then should go to runtime info
since we want to avoid dependencies on mkwrite_device_info where possible.
enum intel_platform platform;
Regards,
Tvrtko
P.S. Just a superficial scan, not a normal review.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx