If we receive the error interrupt before the CS interrupt, we may find ourselves without an active request to reset, skipping the GPU reset. All because the attempt to reset was too early. GEM_BUG_ON(EXECLIST_STATUS_HI() != upper_32_bits(ce->lrc_desc)) ? Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3d5f3f7677bb..53364c0ca487 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2770,6 +2770,16 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) return NULL; } +static struct i915_request *active_context(struct intel_engine_execlists *el) +{ + /* + * Use the most recent result from process_csb(), but just in case + * we trigger an error (via interrupt) before the first CS event has + * been written, peek at the next submission. + */ + return *el->active ?: el->pending[0]; +} + static bool execlists_capture(struct intel_engine_cs *engine) { struct execlists_capture *cap; @@ -2787,7 +2797,7 @@ static bool execlists_capture(struct intel_engine_cs *engine) return true; spin_lock_irq(&engine->active.lock); - cap->rq = execlists_active(&engine->execlists); + cap->rq = active_context(&engine->execlists); if (cap->rq) { cap->rq = active_request(cap->rq->context->timeline, cap->rq); cap->rq = i915_request_get_rcu(cap->rq); @@ -3724,7 +3734,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * its request, it was still running at the time of the * reset and will have been clobbered. */ - rq = execlists_active(execlists); + rq = active_context(execlists); if (!rq) goto unwind; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx