Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656
References: https://bugs.freedesktop.org/show_bug.cgi?id=108714
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 -
drivers/gpu/drm/i915/intel_lrc.c | 140 ++++++++++++++--------------
2 files changed, 69 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index add1fe7aeb93..62bde517d383 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1423,8 +1423,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
gen8_initialize_pd(vm, pd);
gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
-
- mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
}
ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b1f5db3442eb..c84bdc21bcce 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,31 +363,12 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
trace_i915_request_out(rq);
}
-static void
-execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
-{
- ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-}
-
static u64 execlists_update_context(struct i915_request *rq)
{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
struct intel_context *ce = rq->hw_context;
- u32 *reg_state = ce->lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
-
- /*
- * True 32b PPGTT with dynamic page allocation: update PDP
- * registers and point the unallocated PDPs to scratch page.
- * PML4 is allocated during ppgtt init, so this is not needed
- * in 48-bit mode.
- */
- if (!i915_vm_is_48bit(&ppgtt->vm))
- execlists_update_context_pdps(ppgtt, reg_state);
+ ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+ intel_ring_set_tail(rq->ring, rq->tail);
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1247,6 +1228,59 @@ execlists_context_pin(struct intel_engine_cs *engine,
return __execlists_context_pin(engine, ctx, ce);
}
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ /* Be doubly sure the LRI have landed before proceeding */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Re-invalidate the TLB for luck */
+ return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
@@ -1260,11 +1294,6 @@ static int execlists_request_alloc(struct i915_request *request)
*/
request->reserved_space += EXECLISTS_REQUEST_SIZE;
- /* Unconditionally invalidate GPU caches and TLBs. */
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- if (ret)
- return ret;
-
/*
* Note that after this point, we have committed to using
* this request as it is being used to both track the
@@ -1273,6 +1302,14 @@ static int execlists_request_alloc(struct i915_request *request)
* to cancel/unwind this request now.
*/
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm))
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ else
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
}
@@ -1808,56 +1845,11 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
atomic_read(&execlists->tasklet.count));
}
-static int intel_logical_ring_emit_pdps(struct i915_request *rq)
-{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
- struct intel_engine_cs *engine = rq->engine;
- const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
- u32 *cs;
- int i;
-
- cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
- for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
- int ret;
-
- /* Don't rely in hw updating PDPs, specially in lite-restore.
- * Ideally, we should set Force PD Restore in ctx descriptor,
- * but we can't. Force Restore would be a second option, but
- * it is unsafe in case of lite-restore (because the ctx is
- * not idle). PML4 is allocated during ppgtt init so this is
- * not needed in 48-bit.*/
- if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
- !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
- !intel_vgpu_active(rq->i915)) {
- ret = intel_logical_ring_emit_pdps(rq);
- if (ret)
- return ret;
-
- rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
- }
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1890,6 +1882,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
+
intel_ring_advance(rq, cs);
return 0;
@@ -2501,6 +2494,11 @@ static void execlists_init_reg_state(u32 *regs,
* other PDP Descriptors are ignored.
*/
ASSIGN_CTX_PML4(ctx->ppgtt, regs);
+ } else {
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 3);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 2);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 1);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 0);
}
if (rcs) {