As a precaution against the driver fouling up and missing a hang leaving the caller in an indefinite wait, manually inspect for a GPU hang if we timeout whilst waiting for a seqno. v2: To avoid issues with multiple clients running hangchecks concurrently or in very rapid succession, make sure we only reactivate the hangcheck timer if we find it idle whilst waiting. Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0d878c1..dc382eb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1096,7 +1096,6 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, struct timespec before, now, wait_time={1,0}; unsigned long timeout_jiffies; long end; - bool wait_forever = true; int ret; if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) @@ -1104,10 +1103,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, trace_i915_gem_request_wait_begin(ring, seqno); - if (timeout != NULL) { + if (timeout != NULL) wait_time = *timeout; - wait_forever = false; - } timeout_jiffies = timespec_to_jiffies(&wait_time); @@ -1129,6 +1126,12 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, else end = wait_event_timeout(ring->irq_queue, EXIT_COND, timeout_jiffies); + /* Be paranoid and check that we haven't missed a GPU hang */ + if (end == 0 && + i915_enable_hangcheck && + !timer_pending(&dev_priv->gpu_error.hangcheck_timer)) + mod_timer(&dev_priv->gpu_error.hangcheck_timer, + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ @@ -1140,7 +1143,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); if (ret) end = ret; - } while (end == 0 && wait_forever); + } while (end == 0 && timeout == NULL); getrawmonotonic(&now); -- 1.7.10.4