We can forgo queuing the hangcheck from the start of every request to until we wait upon a request. This reduces the overhead of every request, but may increase the latency of detecting a hang. Howeever, if nothing every waits upon a hang, did it ever hang? It also improves the robustness of the wait-request by ensuring that the hangchecker is indeed running before we sleep indefinitely (and thereby ensuring that we never actually sleep forever waiting for a dead GPU). v2: Also queue the hangcheck from retire work in case the GPU become stuck when no one is watching. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_irq.c | 9 ++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bbdb056d2a8e..d9d411919779 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2710,7 +2710,7 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ -void i915_queue_hangcheck(struct drm_device *dev); +void i915_queue_hangcheck(struct drm_i915_private *dev_priv); __printf(3, 4) void i915_handle_error(struct drm_device *dev, bool wedged, const char *fmt, ...); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f570990f03e0..b4da8b354a3b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,6 +1306,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + /* Ensure that even if the GPU hangs, we get woken up. */ + i915_queue_hangcheck(dev_priv); + timer.function = NULL; if (timeout || missed_irq(dev_priv, ring)) { unsigned long expire; @@ -2592,8 +2595,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, trace_i915_gem_request_add(request); - i915_queue_hangcheck(ring->dev); - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); @@ -2947,8 +2948,8 @@ i915_gem_retire_requests(struct drm_device *dev) if (idle) mod_delayed_work(dev_priv->wq, - &dev_priv->mm.idle_work, - msecs_to_jiffies(100)); + &dev_priv->mm.idle_work, + msecs_to_jiffies(100)); return idle; } @@ -2967,9 +2968,11 @@ i915_gem_retire_work_handler(struct work_struct *work) idle = i915_gem_retire_requests(dev); mutex_unlock(&dev->struct_mutex); } - if (!idle) + if (!idle) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); + } } static void diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 15973e917566..94f5f4e99446 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3165,18 +3165,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work) goto out; } + /* Reset timer in case GPU hangs without another request being added */ if (busy_count) - /* Reset timer case chip hangs without another request - * being added */ - i915_queue_hangcheck(dev); + i915_queue_hangcheck(dev_priv); out: ENABLE_RPM_WAKEREF_ASSERTS(dev_priv); } -void i915_queue_hangcheck(struct drm_device *dev) +void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { - struct i915_gpu_error *e = &to_i915(dev)->gpu_error; + struct i915_gpu_error *e = &dev_priv->gpu_error; if (!i915.enable_hangcheck) return; -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx