We still have an odd race with wedging/unwedging as shown by igt/gem_eio that defies expectations. Add some more trace_printks to try and visualize the flow over the precipice. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 13d4b0e74641..2fbd622bba30 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3193,6 +3193,9 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct i915_request *request) { + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); i915_request_submit(request); @@ -3202,6 +3205,9 @@ static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); @@ -3215,6 +3221,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + GEM_TRACE("start\n"); + if (drm_debug & DRM_UT_DRIVER) { struct drm_printer p = drm_debug_printer(__func__); @@ -3279,6 +3287,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) i915_gem_reset_finish_engine(engine); } + GEM_TRACE("end\n"); + wake_up_all(&i915->gpu_error.reset_queue); } @@ -3291,6 +3301,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) return true; + GEM_TRACE("start\n"); + /* * Before unwedging, make sure that all pending operations * are flushed and errored out - we may have requests waiting upon @@ -3341,6 +3353,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) intel_engines_reset_default_submission(i915); i915_gem_contexts_lost(i915); + GEM_TRACE("end\n"); + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1810fa1b81cb..43c7134a9b93 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -207,11 +207,16 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (ret) return ret; + GEM_BUG_ON(i915->gt.active_requests); + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { struct i915_gem_timeline *timeline; struct intel_timeline *tl = engine->timeline; + GEM_TRACE("%s seqno %d -> %d\n", + engine->name, tl->seqno, seqno); + if (!i915_seqno_passed(seqno, tl->seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); @@ -381,6 +386,11 @@ static void i915_request_retire(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; + GEM_TRACE("%s(%d) fence %llx:%d, global_seqno %d\n", + engine->name, intel_engine_get_seqno(engine), + request->fence.context, request->fence.seqno, + request->global_seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); @@ -488,6 +498,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + engine->timeline->seqno); + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); @@ -537,6 +552,11 @@ void __i915_request_unsubmit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; + GEM_TRACE("%s fence %llx:%d <- global_seqno %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno); + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); @@ -996,6 +1016,9 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) u32 *cs; int err; + GEM_TRACE("%s fence %llx:%d\n", + engine->name, request->fence.context, request->fence.seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_request_add(request); -- 2.16.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx