If we already wedged, i915_gem_set_wedged() becomes a complicated no-op. v2: Make sure the double set-wedged is synchronous, a parallel call should not return before the driver is indeed wedged. References: https://bugs.freedesktop.org/show_bug.cgi?id=107343 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 31 ++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gpu_error.h | 3 ++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8b52cb768a67..832c65734c47 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3298,12 +3298,26 @@ static void nop_complete_submit_request(struct i915_request *request) spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } +static void wait_for_wedged(struct i915_gpu_error *error) +{ + DEFINE_WAIT_BIT(wq_entry, &error->flags, I915_WEDGED); + + __wait_on_bit(&error->reset_queue, + &wq_entry, bit_wait, TASK_UNINTERRUPTIBLE); +} + void i915_gem_set_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; - GEM_TRACE("start\n"); + if (test_and_set_bit(I915_WEDGE_IN_PROGRESS, &error->flags)) { + wait_for_wedged(error); + return; + } + if (test_bit(I915_WEDGED, &error->flags)) + return; if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); @@ -3312,8 +3326,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_engine_dump(engine, &p, "%s\n", engine->name); } - set_bit(I915_WEDGED, &i915->gpu_error.flags); - smp_mb__after_atomic(); + GEM_TRACE("start\n"); /* * First, stop submission to hw, but do not yet complete requests by @@ -3372,17 +3385,25 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) i915_gem_reset_finish_engine(engine); } + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); + clear_bit(I915_WEDGE_IN_PROGRESS, &error->flags); + GEM_TRACE("end\n"); - wake_up_all(&i915->gpu_error.reset_queue); + wake_up_all(&error->reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + + if (test_bit(I915_WEDGE_IN_PROGRESS, &error->flags)) + wait_for_wedged(error); + if (!test_bit(I915_WEDGED, &error->flags)) return true; GEM_TRACE("start\n"); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index f893a4e8b783..1a78a8f330f2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -267,8 +267,9 @@ struct i915_gpu_error { #define I915_RESET_BACKOFF 0 #define I915_RESET_HANDOFF 1 #define I915_RESET_MODESET 2 +#define I915_RESET_ENGINE 3 #define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) +#define I915_WEDGE_IN_PROGRESS (I915_WEDGED - 1) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; -- 2.18.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx