On Tue, Apr 23, 2019 at 06:50:13PM -0700, John.C.Harrison@xxxxxxxxx wrote: > From: John Harrison <John.C.Harrison@xxxxxxxxx> > > With virtual engines, it is no longer possible to know which specific > physical engine a given request will be executed on at the time that > request is generated. This means that the request itself must be engine > agnostic - any direct register writes must be relative to the engine > and not absolute addresses. > > The LRI command has support for engine relative addressing. However, > the mechanism is not transparent to the driver. The scheme for Gen11 > (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no > absolute engine base component. The hardware then adds on the correct > engine offset at execution time. > > Due to the non-trivial and differing schemes on different hardware, it > is not possible to simply update the code that creates the LRI > commands to set a remap flag and let the hardware get on with it. > Instead, this patch adds function wrappers for generating the LRI > command itself and then for constructing the correct address to use > with the LRI. > > v2: Fix build break in GVT. Remove flags parameter [review feedback > from Chris W]. > > v3: Fix build break in selftest. Rebase to newer base tree and fix > merge conflict. > > v4: More rebasing. Rmoved relative addressing support from Gen7-9 only > code paths [review feedback from Chris W]. First of all, would you have a rebased version after gt/ ? So, from my point of view v3 was better than this because this spread the __MI_LOAD_REGISTER_IMM everywhere. Maybe I just disagree with Chris and I'd prefer a single place like v3, but anyway we could probably arrive in an intermediate solution like: Couldn't we do in a way that we keep the MI_LRI without '__' and use this new function only on the paths needed? and maybe name this function gen11_get_lri_cmd? to make it clear that gen11+ needs to use this path. > > Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++- > drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +- > drivers/gpu/drm/i915/i915_gem_context.c | 12 +-- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- > drivers/gpu/drm/i915/i915_perf.c | 19 +++-- > drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++ > drivers/gpu/drm/i915/intel_gpu_commands.h | 6 +- > drivers/gpu/drm/i915/intel_lrc.c | 79 ++++++++++--------- > drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +- > drivers/gpu/drm/i915/intel_mocs.c | 17 ++-- > drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 4 + > drivers/gpu/drm/i915/intel_workarounds.c | 4 +- > .../drm/i915/selftests/intel_workarounds.c | 9 ++- > 14 files changed, 154 insertions(+), 79 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c > index e7e14c842be4..1b4d78e55ed6 100644 > --- a/drivers/gpu/drm/i915/gvt/mmio_context.c > +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c > @@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(count); > + *cs++ = i915_get_lri_cmd(req->engine, count); > for (mmio = gvt->engine_mmio_list.mmio; > i915_mmio_reg_valid(mmio->reg); mmio++) { > if (mmio->ring_id != ring_id || > !mmio->in_context) > continue; > > - *cs++ = i915_mmio_reg_offset(mmio->reg); > + *cs++ = i915_get_lri_reg(req->engine, mmio->reg); > *cs++ = vgpu_vreg_t(vgpu, mmio->reg) | > (mmio->mask << 16); > gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", > @@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE); > + /* > + * GEN9_GFX_MOCS is not engine relative, therefore there is no > + * need for relative addressing. > + */ > + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE); > > for (index = 0; index < GEN9_MOCS_SIZE; index++) { > *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index)); > @@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2); > + /* > + * GEN9_LNCFCMOCS is not engine relative, therefore there is no > + * need for relative addressing. > + */ > + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2); > > for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) { > *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index)); > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c > index 503d548a55f7..91ebe18aacc6 100644 > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c > @@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { > CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), > CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), > CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), > - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, > + CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, > .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), > CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B, > .reg = { .offset = 1, .mask = 0x007FFFFC }, > @@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, > return false; > } > > - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && > + if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) && > (offset + 2 > length || > (cmd[offset + 1] & reg->mask) != reg->value)) { > DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index dd728b26b5aa..f25dc613c266 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -1039,11 +1039,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(2); > + *cs++ = i915_get_lri_cmd(engine, 2); > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0)); > *cs++ = upper_32_bits(pd_daddr); > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0)); > *cs++ = lower_32_bits(pd_daddr); > > *cs++ = MI_NOOP; > @@ -1053,13 +1053,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); > + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES); > for (i = GEN8_3LVL_PDPES; i--; ) { > const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i)); > *cs++ = upper_32_bits(pd_daddr); > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i)); > *cs++ = lower_32_bits(pd_daddr); > } > *cs++ = MI_NOOP; > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3d672c9edb94..983801f481ba 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -1965,7 +1965,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(4); > + /* Gen7 only so no need to support relative offsets */ > + *cs++ = __MI_LOAD_REGISTER_IMM(4); > for (i = 0; i < 4; i++) { > *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); > *cs++ = 0; > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 39a4804091d7..10d5ab991908 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) > * in the case that the OA unit has been disabled. > */ > static void > -gen8_update_reg_state_unlocked(struct intel_context *ce, > +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine, > + struct intel_context *ce, > u32 *reg_state, > const struct i915_oa_config *oa_config) > { > @@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > }; > int i; > > - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > + /* > + * NB: The LRI instruction is generated by the hardware. > + * Should we read it in and assert that the offset flag is set? > + */ > + > + CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > GEN8_OA_COUNTER_RESUME); > @@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > } > } > > - CTX_REG(reg_state, state_offset, flex_regs[i], value); > + CTX_REG(engine, reg_state, state_offset, flex_regs[i], value); > } > > - CTX_REG(reg_state, > + CTX_REG(engine, reg_state, > CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, > gen8_make_rpcs(i915, &ce->sseu)); > } > @@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > ce->state->obj->mm.dirty = true; > regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); > > - gen8_update_reg_state_unlocked(ce, regs, oa_config); > + gen8_update_reg_state_unlocked(engine, ce, regs, oa_config); > > i915_gem_object_unpin_map(ce->state->obj); > } > @@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, > > stream = engine->i915->perf.oa.exclusive_stream; > if (stream) > - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config); > + gen8_update_reg_state_unlocked(engine, ce, regs, > + stream->oa_config); > } > > /** > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index eea9bec04f1b..ee33ce265820 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915, > return bases[i].base; > } > > +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine) > +{ > + if (INTEL_GEN(engine->i915) < 11) > + return false; > + > + if (engine->id == BCS0) > + return false; > + > + return true; > +} > + > static void __sprint_engine_name(char *name, const struct engine_info *info) > { > WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", > diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h > index a34ece53a771..e7784b3fb759 100644 > --- a/drivers/gpu/drm/i915/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h > @@ -123,9 +123,13 @@ > * simply ignores the register load under certain conditions. > * - One can actually load arbitrary many arbitrary registers: Simply issue x > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! > + * - Newer hardware supports engine relative addresses but older hardware does > + * not. So never call MI_LRI directly, always use the i915_get_lri_cmd() > + * and i915_get_lri_reg() helper functions. > */ > -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > #define MI_LRI_FORCE_POSTED (1<<12) > +#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19) > #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) > #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) > #define MI_SRM_LRM_GLOBAL_GTT (1<<22) > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 4e0a351bfbca..41cbbcd9f0dd 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1383,14 +1383,15 @@ static int emit_pdps(struct i915_request *rq) > return PTR_ERR(cs); > > /* Ensure the LRI have landed before we invalidate & continue */ > - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; > + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) | > + MI_LRI_FORCE_POSTED; > for (i = GEN8_3LVL_PDPES; i--; ) { > const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); > u32 base = engine->mmio_base; > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i)); > *cs++ = upper_32_bits(pd_daddr); > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i)); > *cs++ = lower_32_bits(pd_daddr); > } > *cs++ = MI_NOOP; > @@ -1464,7 +1465,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) > *batch++ = i915_scratch_offset(engine->i915) + 256; > *batch++ = 0; > > - *batch++ = MI_LOAD_REGISTER_IMM(1); > + /* Gen8/9 only so no need to support relative offsets */ > + *batch++ = __MI_LOAD_REGISTER_IMM(1); > *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); > *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; > > @@ -1535,13 +1537,14 @@ struct lri { > u32 value; > }; > > -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) > +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch, > + const struct lri *lri, unsigned int count) > { > GEM_BUG_ON(!count || count > 63); > > - *batch++ = MI_LOAD_REGISTER_IMM(count); > + *batch++ = i915_get_lri_cmd(engine, count); > do { > - *batch++ = i915_mmio_reg_offset(lri->reg); > + *batch++ = i915_get_lri_reg(engine, lri->reg); > *batch++ = lri->value; > } while (lri++, --count); > *batch++ = MI_NOOP; > @@ -1579,7 +1582,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) > /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ > batch = gen8_emit_flush_coherentl3_wa(engine, batch); > > - batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); > + batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri)); > > /* WaMediaPoolStateCmdInWABB:bxt,glk */ > if (HAS_POOLED_EU(engine->i915)) { > @@ -2728,10 +2731,10 @@ static void execlists_init_reg_state(u32 *regs, > * values (including all the missing MI_LOAD_REGISTER_IMM commands that > * we are not initializing here). > */ > - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | > - MI_LRI_FORCE_POSTED; > + regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) | > + MI_LRI_FORCE_POSTED; > > - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), > + CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), > _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | > _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); > if (INTEL_GEN(engine->i915) < 11) { > @@ -2739,22 +2742,23 @@ static void execlists_init_reg_state(u32 *regs, > _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | > CTX_CTRL_RS_CTX_ENABLE); > } > - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); > - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); > - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); > - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), > + CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0); > + CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0); > + CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0); > + CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), > RING_CTL_SIZE(ring->size) | RING_VALID); > - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); > - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); > - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); > - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); > - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); > - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); > + CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); > + CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); > + CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); > + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); > + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); > + CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); > if (rcs) { > struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; > > - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); > - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, > + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX, > + RING_INDIRECT_CTX(base), 0); > + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET, > RING_INDIRECT_CTX_OFFSET(base), 0); > if (wa_ctx->indirect_ctx.size) { > u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); > @@ -2767,7 +2771,8 @@ static void execlists_init_reg_state(u32 *regs, > intel_lr_indirect_ctx_offset(engine) << 6; > } > > - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); > + CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR, > + RING_BB_PER_CTX_PTR(base), 0); > if (wa_ctx->per_ctx.size) { > u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); > > @@ -2776,18 +2781,19 @@ static void execlists_init_reg_state(u32 *regs, > } > } > > - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; > + regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) | > + MI_LRI_FORCE_POSTED; > > - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); > + CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); > /* PDP values well be assigned later if needed */ > - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); > - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); > - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); > - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); > - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); > - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); > - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); > - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); > + CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); > + CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); > + CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); > + CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); > + CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); > + CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); > + CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); > + CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); > > if (i915_vm_is_4lvl(&ppgtt->vm)) { > /* 64b PPGTT (48bit canonical) > @@ -2803,8 +2809,9 @@ static void execlists_init_reg_state(u32 *regs, > } > > if (rcs) { > - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); > + regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1); > + CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE, > + GEN8_R_PWR_CLK_STATE, 0); > > i915_oa_init_reg_state(engine, ce, regs); > } > diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h > index 5ef932d810a7..40b1142d0d74 100644 > --- a/drivers/gpu/drm/i915/intel_lrc_reg.h > +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h > @@ -39,10 +39,10 @@ > #define CTX_R_PWR_CLK_STATE 0x42 > #define CTX_END 0x44 > > -#define CTX_REG(reg_state, pos, reg, val) do { \ > +#define CTX_REG(engine, reg_state, pos, reg, val) do { \ > u32 *reg_state__ = (reg_state); \ > const u32 pos__ = (pos); \ > - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ > + (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \ > (reg_state__)[(pos__) + 1] = (val); \ > } while (0) > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > index 274ba78500c0..bb11d0f68bba 100644 > --- a/drivers/gpu/drm/i915/intel_mocs.c > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table, > /** > * intel_mocs_init_engine() - emit the mocs control table > * @engine: The engine for whom to emit the registers. > - * > - * This function simply emits a MI_LOAD_REGISTER_IMM command for the > - * given table starting at the given address. > */ > void intel_mocs_init_engine(struct intel_engine_cs *engine) > { > @@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); > + *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries); > > for (index = 0; index < table->size; index++) { > u32 value = get_entry_control(table, index); > > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > + *cs++ = i915_get_lri_reg(rq->engine, > + mocs_register(engine, index)); > *cs++ = value; > } > > /* All remaining entries are also unused */ > for (; index < table->n_entries; index++) { > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > + *cs++ = i915_get_lri_reg(rq->engine, > + mocs_register(engine, index)); > *cs++ = unused_value; > } > > @@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); > + /* > + * GEN9_LNCFCMOCS is not engine relative, therefore there is no > + * need for relative addressing? > + */ > + *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2); > > for (i = 0; i < table->size / 2; i++) { > u16 low = get_entry_l3cc(table, 2 * i); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 3844581f622c..107ed7c0d1fa 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1592,12 +1592,13 @@ static int load_pd_dir(struct i915_request *rq, > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); > + /* Can these not be merged into a single LRI??? */ > + *cs++ = i915_get_lri_cmd(engine, 1); > + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base)); > *cs++ = PP_DIR_DCLV_2G; > > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); > + *cs++ = i915_get_lri_cmd(engine, 1); > + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base)); > *cs++ = ppgtt->pd.base.ggtt_offset << 10; > > intel_ring_advance(rq, cs); > @@ -1662,7 +1663,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > if (num_engines) { > struct intel_engine_cs *signaller; > > - *cs++ = MI_LOAD_REGISTER_IMM(num_engines); > + /* > + * Must use absolute engine address as the register > + * write is targeting a different engine. > + */ > + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines); > for_each_engine(signaller, i915, id) { > if (signaller == engine) > continue; > @@ -1708,7 +1713,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > struct intel_engine_cs *signaller; > i915_reg_t last_reg = {}; /* keep gcc quiet */ > > - *cs++ = MI_LOAD_REGISTER_IMM(num_engines); > + /* > + * Must use absolute engine address as the register > + * write is targeting a different engine. > + */ > + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines); > for_each_engine(signaller, i915, id) { > if (signaller == engine) > continue; > @@ -1750,9 +1759,9 @@ static int remap_l3(struct i915_request *rq, int slice) > * here because no other code should access these registers other than > * at initialization time. > */ > - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); > + *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4); > for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { > - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); > + *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i)); > *cs++ = remap_info[i]; > } > *cs++ = MI_NOOP; > @@ -2337,3 +2346,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) > > return intel_init_ring_buffer(engine); > } > + > +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count) > +{ > + u32 word; > + > + word = __MI_LOAD_REGISTER_IMM(word_count); > + > + if (i915_engine_has_relative_lri(engine)) > + word |= MI_LRI_ADD_CS_MMIO_START_GEN11; > + > + return word; > +} > + > +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg) > +{ > + if (!i915_engine_has_relative_lri(engine)) > + return i915_mmio_reg_offset(reg); > + > + return i915_mmio_reg_offset(reg) - engine->mmio_base; > +} > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 72c7c337ace9..261b3c433069 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -580,4 +580,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) > return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); > } > > +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine); > +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count); > +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg); > + > #endif /* _INTEL_RINGBUFFER_H_ */ > diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c > index b3cbed1ee1c9..a50c47993c88 100644 > --- a/drivers/gpu/drm/i915/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/intel_workarounds.c > @@ -629,9 +629,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) > if (IS_ERR(cs)) > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(wal->count); > + *cs++ = i915_get_lri_cmd(rq->engine, wal->count); > for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { > - *cs++ = i915_mmio_reg_offset(wa->reg); > + *cs++ = i915_get_lri_reg(rq->engine, wa->reg); > *cs++ = wa->val; > } > *cs++ = MI_NOOP; > diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c > index a363748a7a4f..dbe3cd4d4981 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c > @@ -444,6 +444,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, > > for (i = 0; i < engine->whitelist.count; i++) { > u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); > + u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg); > u64 addr = scratch->node.start; > struct i915_request *rq; > u32 srm, lrm, rsvd; > @@ -476,8 +477,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, > idx = 1; > for (v = 0; v < ARRAY_SIZE(values); v++) { > /* LRI garbage */ > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = reg; > + *cs++ = i915_get_lri_cmd(engine, 1); > + *cs++ = regLRI; > *cs++ = values[v]; > > /* SRM result */ > @@ -489,8 +490,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, > } > for (v = 0; v < ARRAY_SIZE(values); v++) { > /* LRI garbage */ > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = reg; > + *cs++ = i915_get_lri_cmd(engine, 1); > + *cs++ = regLRI; > *cs++ = ~values[v]; > > /* SRM result */ > -- > 2.21.0.5.gaeb582a983 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx