When we wedge the device, we override engine->submit_request with a nop to ensure that all in-flight requests are marked in error. However, igt would like to unwedge the device to test -EIO handling. This requires us to flush those in-flight requests and restore the original engine->submit_request. v2: Use a vfunc to unify enabling request submission to engines Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Testcase: igt/gem_eio Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 45 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++ drivers/gpu/drm/i915/intel_lrc.c | 15 ++++------ drivers/gpu/drm/i915/intel_lrc.h | 1 - drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++ 9 files changed, 80 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b1e9027a4f80..576b03b0048c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1825,7 +1825,7 @@ void i915_reset(struct drm_i915_private *dev_priv) return; /* Clear any previous failed attempts at recovery. Time to try again. */ - __clear_bit(I915_WEDGED, &error->flags); + i915_gem_unset_wedged(dev_priv); error->reset_count++; pr_notice("drm/i915: Resetting chip after gpu hang\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eef096566b81..71f07f45d5fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3409,6 +3409,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); +void i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 202bb850f260..aae491076390 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3000,6 +3000,51 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv) mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); } +void i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gem_timeline *tl; + int i; + + lockdep_assert_held(&i915->drm.struct_mutex); + if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + return; + + /* Before unwedging, make sure that all pending operations + * are flushed and errored out. No more can be submitted until + * we reset the wedged bit. + */ + list_for_each_entry(tl, &i915->gt.timelines, link) { + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_active_peek(&tl->engine[i].last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; + + /* We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. + */ + dma_fence_default_wait(&rq->fence, false, + MAX_SCHEDULE_TIMEOUT); + } + } + + /* Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_enable_submission(i915); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); +} + static void i915_gem_retire_work_handler(struct work_struct *work) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 41f2dd87b413..28289627a4ca 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1003,7 +1003,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) return; /* Revert back to manual ELSP submission */ - intel_execlists_enable_submission(dev_priv); + intel_engines_enable_submission(dev_priv); } void i915_guc_submission_fini(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 73fe718516a5..5663ebab851f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv) goto cleanup; } + engine->enable_submission(engine); mask |= ENGINE_MASK(id); } @@ -1115,6 +1116,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) return true; } +void intel_engines_enable_submission(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + engine->enable_submission(engine); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 89f38e7def9f..f79df7a51e60 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1560,15 +1560,10 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) kfree(engine); } -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +static void logical_ring_enable_submission(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) { - engine->submit_request = execlists_submit_request; - engine->schedule = execlists_schedule; - } + engine->submit_request = execlists_submit_request; + engine->schedule = execlists_schedule; } static void @@ -1586,8 +1581,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; - engine->submit_request = execlists_submit_request; - engine->schedule = execlists_schedule; + + engine->enable_submission = logical_ring_enable_submission; engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 5fc07761caff..e8015e7bf4e9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -87,6 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4a864f8c9387..5b141f6639b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2050,6 +2050,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, } } +static void i9xx_enable_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = i9xx_submit_request; +} + +static void gen6_bsd_enable_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = gen6_bsd_submit_request; +} + static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -2080,7 +2090,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_breadcrumb_sz++; } } - engine->submit_request = i9xx_submit_request; + + engine->enable_submission = i9xx_enable_submission; if (INTEL_GEN(dev_priv) >= 8) engine->emit_bb_start = gen8_emit_bb_start; @@ -2165,7 +2176,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 6) { /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) - engine->submit_request = gen6_bsd_submit_request; + engine->enable_submission = gen6_bsd_enable_submission; engine->emit_flush = gen6_bsd_ring_flush; if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0ef491df5b4e..30d9820d978c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -273,6 +273,8 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*enable_submission)(struct intel_engine_cs *engine); + int (*context_pin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, @@ -669,4 +671,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +void intel_engines_enable_submission(struct drm_i915_private *i915); + #endif /* _INTEL_RINGBUFFER_H_ */ -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx