From: John Harrison <John.C.Harrison@xxxxxxxxx> Gen12 introduces a completely new and different scheme for implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This requires using the base address of instance zero of the relevant engine class. And then, it is only valid if the register in question falls within a certain range as specified by a table. Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> CC: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 185 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- 4 files changed, 195 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index efe1c377d797..a65e8ccd9d8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -236,6 +236,127 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915, return bases[i].base; } +static void lri_init_remap_base(struct intel_engine_cs *engine) +{ + struct intel_engine_cs *remap_engine; + + engine->lri_mmio_base = 0; + + if (INTEL_GEN(engine->i915) < 12) + return; + + remap_engine = engine->i915->gt.engine_class[engine->class][0]; + GEM_BUG_ON(!remap_engine); + + engine->lri_mmio_base = remap_engine->mmio_base; +} + +static void lri_add_range(struct intel_engine_cs *engine, u32 min, u32 max) +{ + GEM_BUG_ON(engine->lri_num_ranges >= INTEL_MAX_LRI_RANGES); + + engine->lri_ranges[engine->lri_num_ranges].min = min; + engine->lri_ranges[engine->lri_num_ranges].max = max; + engine->lri_num_ranges++; +} + +static void lri_init_remap_ranges(struct intel_engine_cs *engine) +{ + bool has_aux_tables = true; /* Removed after TGL? */ + u32 offset; + + engine->lri_num_ranges = 0; + + if (INTEL_GEN(engine->i915) < 12) + return; + + switch (engine->class) { + case RENDER_CLASS: + /* Hardware Front End */ + lri_add_range(engine, 0x000 + engine->mmio_base, + 0x7FF + engine->mmio_base); + + /* TRTT */ + lri_add_range(engine, 0x4400, 0x441F); + + /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */ + if (has_aux_tables) + lri_add_range(engine, 0x4200, 0x420F); + break; + + case VIDEO_DECODE_CLASS: + lri_add_range(engine, 0x0000 + engine->mmio_base, + 0x3FFF + engine->mmio_base); + + /* TRTT */ + offset = ((engine->instance & 0x1) * 0x20) + + ((engine->instance >> 1) * 0x100); + lri_add_range(engine, 0x4420 + offset, 0x443F + offset); + + /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */ + if (has_aux_tables) { + switch (engine->instance) { + case 0: + lri_add_range(engine, 0x4210, 0x421F); + break; + + case 1: + lri_add_range(engine, 0x4220, 0x422F); + break; + + case 2: + lri_add_range(engine, 0x4290, 0x429F); + break; + + case 3: + lri_add_range(engine, 0x42A0, 0x42AF); + break; + + default: + break; + } + } + break; + + case VIDEO_ENHANCEMENT_CLASS: + lri_add_range(engine, 0x0000 + engine->mmio_base, + 0x3FFF + engine->mmio_base); + + /* TRTT */ + offset = engine->instance * 0x100; + lri_add_range(engine, 0x4460 + offset, 0x447F + offset); + + /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */ + if (has_aux_tables) { + switch (engine->instance) { + case 0: + lri_add_range(engine, 0x4230, 0x423F); + break; + + case 1: + lri_add_range(engine, 0x42B0, 0x42BF); + break; + + case 2: + lri_add_range(engine, 0x4290, 0x429F); + break; + + case 3: + // Same address as instance 1??? + lri_add_range(engine, 0x42B0, 0x42BF); + break; + + default: + break; + } + } + break; + + default: + break; + } +} + static u32 i915_get_lri_cmd_legacy(const struct intel_engine_cs *engine, u32 word_count) { @@ -249,6 +370,27 @@ static u32 i915_get_lri_cmd_add_offset(const struct intel_engine_cs *engine, MI_LRI_ADD_CS_MMIO_START_GEN11; } +static u32 i915_get_lri_cmd_remap(const struct intel_engine_cs *engine, + u32 word_count) +{ + u32 word; + + word = __MI_LOAD_REGISTER_IMM(word_count); + + /* if (lri_is_reg_in_remap_table(engine, reg)) ??? */ + word |= MI_LRI_MMIO_REMAP_GEN12; + + /* + * NB: To gate this on the reg address will require knowing + * all reg addresses in advance. This is not currently the + * case as some LRI commands are built from multiple sources. + * Also, what if some regs require remap and some do not? + * The LRI command would need to be split into multiple pieces. + */ + + return word; +} + static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine) { if (INTEL_GEN(engine->i915) < 11) @@ -262,18 +404,53 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine) static void lri_init(struct intel_engine_cs *engine) { - if (i915_engine_has_relative_lri(engine)) - engine->get_lri_cmd = i915_get_lri_cmd_add_offset; - else + if (i915_engine_has_relative_lri(engine)) { + if (INTEL_GEN(engine->i915) < 12) + engine->get_lri_cmd = i915_get_lri_cmd_add_offset; + else { + engine->get_lri_cmd = i915_get_lri_cmd_remap; + + lri_init_remap_base(engine); + lri_init_remap_ranges(engine); + } + } else engine->get_lri_cmd = i915_get_lri_cmd_legacy; } +static bool lri_is_reg_in_remap_table(const struct intel_engine_cs *engine, + i915_reg_t reg) +{ + int i; + u32 offset = i915_mmio_reg_offset(reg); + + for (i = 0; i < engine->lri_num_ranges; i++) { + if (offset < engine->lri_ranges[i].min) + continue; + + if (offset > engine->lri_ranges[i].max) + continue; + + return true; + } + + return false; +} + u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg) { if (!i915_engine_has_relative_lri(engine)) return i915_mmio_reg_offset(reg); - return i915_mmio_reg_offset(reg) - engine->mmio_base; + if (INTEL_GEN(engine->i915) < 12) + return i915_mmio_reg_offset(reg) - engine->mmio_base; + + if (!WARN_ON(lri_is_reg_in_remap_table(engine, reg))) { + /* Is this meant to happen? */ + return i915_mmio_reg_offset(reg); + } + + return i915_mmio_reg_offset(reg) - engine->mmio_base + + engine->lri_mmio_base; } static void __sprint_engine_name(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 7ca6c86a33f6..1e26f668e73b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -306,6 +306,13 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; +#define INTEL_MAX_LRI_RANGES 3 + struct lri_range { + u32 min, max; + } lri_ranges[INTEL_MAX_LRI_RANGES]; + u32 lri_num_ranges; + u32 lri_mmio_base; + u32 (*get_lri_cmd)(const struct intel_engine_cs *engine, u32 word_count); diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index eaa019df0ce7..0ee62a61d7b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -130,14 +130,15 @@ * simply ignores the register load under certain conditions. * - One can actually load arbitrary many arbitrary registers: Simply issue x * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! - * - Newer hardware supports engine relative addressing but older hardware does - * not. This is required for hw engine load balancing. Hence the MI_LRI - * instruction itself is prefixed with '__' and should only be used on - * legacy hardware code paths. Generic code must always use the MI_LRI + * - Newer hardware supports engine relative addressing but using multiple + * incompatible schemes. This is required for hw engine load balancing. Hence + * the MI_LRI instruction itself is prefixed with '__' and should only be + * used on legacy hardware code paths. Generic code must always use the MI_LRI * and i915_get_lri_reg() helper functions instead. */ #define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LRI_MMIO_REMAP_GEN12 (1<<17) #define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 83abdda05ba2..f88642209283 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1694,7 +1694,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, /* * NB: The LRI instruction is generated by the hardware. - * Should we read it in and assert that the offset flag is set? + * Should we read it in and assert that the appropriate + * offset flag is set? */ CTX_REG(ce->engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, -- 2.21.0.5.gaeb582a983 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx