At a few points in our uABI, we check to see if the driver is wedged and report -EIO back to the user in that case. However, as we perform the check and reset asynchronously, we may instead see the temporary wedging used to cancel inflight rendering to avoid a deadlock during reset. If we suspect this is the case, that is we see a wedged driver and reset in progress, then do a round-trip back to userspace with an -EAGAIN until the reset attempt is resolved. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109580 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 5 +++-- drivers/gpu/drm/i915/i915_request.c | 5 +++-- drivers/gpu/drm/i915/i915_reset.c | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_reset.h | 2 ++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b421bc7a2e26..1a730a005d17 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3806,8 +3806,9 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) long ret; /* ABI: return -EIO if already wedged */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) - return -EIO; + ret = i915_reset_backoff(dev_priv); + if (ret) + return ret; spin_lock(&file_priv->mm.lock); list_for_each_entry(request, &file_priv->mm.request_list, client_link) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5ab4e1c01618..a0d91b24b12b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -561,8 +561,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - if (i915_terminally_wedged(&i915->gpu_error)) - return ERR_PTR(-EIO); + ret = i915_reset_backoff(i915); + if (ret) + return ERR_PTR(ret); /* * Pinning the contexts may generate requests in order to acquire diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 4df4c674223d..fa3eb2a840ff 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -1334,6 +1334,20 @@ __releases(&i915->gpu_error.reset_backoff_srcu) srcu_read_unlock(&error->reset_backoff_srcu, tag); } +int i915_reset_backoff(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + unsigned long flags = READ_ONCE(error->flags); + + if (likely(!test_bit(I915_WEDGED, &flags))) + return 0; + + if (test_bit(I915_RESET_BACKOFF, &flags)) + return -EAGAIN; /* reset still in progress; may recover */ + + return -EIO; +} + bool i915_reset_flush(struct drm_i915_private *i915) { int err; diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index 893c5d1c2eb8..2aeaad6114e9 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -36,6 +36,8 @@ bool i915_reset_flush(struct drm_i915_private *i915); int __must_check i915_reset_trylock(struct drm_i915_private *i915); void i915_reset_unlock(struct drm_i915_private *i915, int tag); +int i915_reset_backoff(struct drm_i915_private *i915); + bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx