[PATCH] drm/i915/execlists: Reset RING registers upon resume

Eric Blau <eblau@xxxxxxxxx> · Mon, 6 Feb 2017 15:39:09 -0500

commit bafb2f7d4755bf1571bd5e9a03b97f3fc4fe69ae
Author: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Date:   Wed Sep 21 14:51:08 2016 +0100

    drm/i915/execlists: Reset RING registers upon resume

    There is a disparity in the context image saved to disk and our own
    bookkeeping - that is we presume the RING_HEAD and RING_TAIL match our
    stored ce->ring->tail value. However, as we emit WA_TAIL_DWORDS into the
    ring but may not tell the GPU about them, the GPU may be lagging behind
    our bookkeeping. Upon hibernation we do not save stolen pages, presuming
    that their contents are volatile. This means that although we start
    writing into the ring at tail, the GPU starts executing from its HEAD
    and there may be some garbage in between and so the GPU promptly hangs
    upon resume.

    Testcase: igt/gem_exec_suspend/basic-S4
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526
    Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
    Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
    Link: http://patchwork.freedesktop.org/patch/msgid/20160921135108.29574-3-chris@xxxxxxxxxxxxxxxxxx

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 251143361f31..39417b77bff2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2129,30 +2129,42 @@ static int
execlists_context_deferred_alloc(struct i915_gem_context *ctx,

 void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
- struct i915_gem_context *ctx = dev_priv->kernel_context;
  struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+
+ /* Because we emit WA_TAIL_DWORDS there may be a disparity
+ * between our bookkeeping in ce->ring->head and ce->ring->tail and
+ * that stored in context. As we only write new commands from
+ * ce->ring->tail onwards, everything before that is junk. If the GPU
+ * starts reading from its RING_HEAD from the context, it may try to
+ * execute that junk and die.
+ *
+ * So to avoid that we reset the context images upon resume. For
+ * simplicity, we just zero everything out.
+ */
+ list_for_each_entry(ctx, &dev_priv->context_list, link) {
+ for_each_engine(engine, dev_priv) {
+ struct intel_context *ce = &ctx->engine[engine->id];
+ u32 *reg;

- for_each_engine(engine, dev_priv) {
- struct intel_context *ce = &ctx->engine[engine->id];
- void *vaddr;
- uint32_t *reg_state;
-
- if (!ce->state)
- continue;
-
- vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
- if (WARN_ON(IS_ERR(vaddr)))
- continue;
+ if (!ce->state)
+ continue;

- reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ reg = i915_gem_object_pin_map(ce->state->obj,
+      I915_MAP_WB);
+ if (WARN_ON(IS_ERR(reg)))
+ continue;

- reg_state[CTX_RING_HEAD+1] = 0;
- reg_state[CTX_RING_TAIL+1] = 0;
+ reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
+ reg[CTX_RING_HEAD+1] = 0;
+ reg[CTX_RING_TAIL+1] = 0;

- ce->state->obj->dirty = true;
- i915_gem_object_unpin_map(ce->state->obj);
+ ce->state->obj->dirty = true;
+ i915_gem_object_unpin_map(ce->state->obj);

- ce->ring->head = 0;
- ce->ring->tail = 0;
+ ce->ring->head = ce->ring->tail = 0;
+ ce->ring->last_retired_head = -1;
+ intel_ring_update_space(ce->ring);
+ }
  }
 }
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html