If context has recently submitted a faulty batchbuffers guilty of gpu hang and decides to keep submitting more crap, ban it permanently. Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> --- drivers/gpu/drm/i915/i915_drv.c | 23 ++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ebed96..69c9856 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -817,6 +817,8 @@ int intel_gpu_reset(struct drm_device *dev) int i915_reset(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; + struct ctx_reset_state *gstate; + bool do_wedge = true; int ret; if (!i915_try_reset) @@ -824,10 +826,29 @@ int i915_reset(struct drm_device *dev) mutex_lock(&dev->struct_mutex); + /* i915_gem_reset will set this if it finds guilty context */ + dev_priv->gpu_error.guilty_state = NULL; + i915_gem_reset(dev); + gstate = dev_priv->gpu_error.guilty_state; + + if (gstate) { + if (gstate->guilty == 1) { + do_wedge = false; + } else if (!gstate->banned && + get_seconds() - gstate->last_guilty_reset < 5) { + gstate->banned = true; + do_wedge = false; + } + + gstate->last_guilty_reset = get_seconds(); + } + + dev_priv->gpu_error.guilty_state = NULL; + ret = -ENODEV; - if (get_seconds() - dev_priv->gpu_error.last_reset < 5) + if (do_wedge && get_seconds() - dev_priv->gpu_error.last_reset < 5) DRM_ERROR("GPU hanging too fast, declaring wedged!\n"); else ret = intel_gpu_reset(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1a4cba0..3e11acf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -818,6 +818,7 @@ struct i915_gpu_error { struct work_struct work; unsigned long last_reset; + struct ctx_reset_state *guilty_state; unsigned long guilty_cnt; /** diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6d3916a..de7403f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2175,6 +2175,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, if (guilty) { rs->guilty++; dev_priv->gpu_error.guilty_cnt++; + dev_priv->gpu_error.guilty_state = rs; } else { rs->innocent++; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f23013..97d3887 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -841,6 +841,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; struct i915_hw_context *ctx; + struct ctx_reset_state *rs; u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 mask, flags; @@ -1023,6 +1024,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; + ret = i915_gem_context_get_reset_state(&dev_priv->ring[RCS], + file, ctx_id, &rs); + if (ret) + goto err; + + if (rs->banned) { + ret = -EIO; + goto err; + } + ctx = i915_switch_context(ring, file, ctx_id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); -- 1.7.9.5