Since capturing the error state requires fiddling around with the GGTT to read arbitrary buffers and is itself run under stop_machine(), it deadlocks the machine (effectively a hard hang) when run in conjunction with Broxton's VTd workaround to serialize GGTT access. v2: Store the ERR_PTR in first_error so that the error can be reported to the user via sysfs. Fixes: 0ef34ad6222a ("drm/i915: Serialize GTT/Aperture accesses on BXT") Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Jon Bloomfield <jon.bloomfield@xxxxxxxxx> Cc: John Harrison <john.C.Harrison@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 8 +++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 594fd14e66c5..1eca4f954050 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3990,6 +3990,7 @@ static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); +void i915_disable_error_state(struct drm_i915_private *i915, int err); #else @@ -4002,13 +4003,18 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, static inline struct i915_gpu_state * i915_first_error_state(struct drm_i915_private *i915) { - return NULL; + return ERR_PTR(-ENODEV); } static inline void i915_reset_error_state(struct drm_i915_private *i915) { } +static inline void i915_disable_error_state(struct drm_i915_private *i915, + int err) +{ +} + #endif const char *i915_cache_level_str(struct drm_i915_private *i915, int type); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f3c35e826321..0264d88b4cff 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3373,6 +3373,9 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL; if (ggtt->base.clear_range != nop_clear_range) ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL; + + /* Prevent recursively calling stop_machine() and deadlocks. */ + i915_disable_error_state(dev_priv, -ENODEV); } ggtt->invalidate = gen6_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 48418fb81066..0b45d28624b7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -633,6 +633,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, return 0; } + if (IS_ERR(error)) + return PTR_ERR(error); + if (*error->error_msg) err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); @@ -1819,6 +1822,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); + i915_disable_error_state(dev_priv, -ENOMEM); return; } @@ -1874,5 +1878,14 @@ void i915_reset_error_state(struct drm_i915_private *i915) i915->gpu_error.first_error = NULL; spin_unlock_irq(&i915->gpu_error.lock); - i915_gpu_state_put(error); + if (!IS_ERR(error)) + i915_gpu_state_put(error); +} + +void i915_disable_error_state(struct drm_i915_private *i915, int err) +{ + spin_lock_irq(&i915->gpu_error.lock); + if (!i915->gpu_error.first_error) + i915->gpu_error.first_error = ERR_PTR(err); + spin_unlock_irq(&i915->gpu_error.lock); } -- 2.15.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx