If there is very small loop in batch, the chances are quite high that we sample the same head value twice in a row leading the hangcheck score to be incremented with hung engine status, instead of active loop which would have been more correct. Try to resample the actual head few times to detect small loops instead of jumping into conclusions. Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c1d1400..7c1168b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2914,12 +2914,8 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) } static enum intel_ring_hangcheck_action -head_stuck(struct intel_engine_cs *ring, u64 acthd) +head_action(struct intel_engine_cs *ring, u64 acthd) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - u32 head; - if (acthd != ring->hangcheck.acthd) { if (acthd > ring->hangcheck.max_acthd) { ring->hangcheck.max_acthd = acthd; @@ -2929,6 +2925,21 @@ head_stuck(struct intel_engine_cs *ring, u64 acthd) return HANGCHECK_ACTIVE_LOOP; } + return HANGCHECK_HUNG; +} + +static enum intel_ring_hangcheck_action +head_stuck(struct intel_engine_cs *ring, u64 acthd) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + static enum intel_ring_hangcheck_action ha; + u32 head, retries = 5; + + ha = head_action(ring, acthd); + if (ha != HANGCHECK_HUNG) + return ha; + head = I915_READ_HEAD(ring) & HEAD_ADDR; /* Some operations, like pipe flush, can take a long time. @@ -2938,6 +2949,17 @@ head_stuck(struct intel_engine_cs *ring, u64 acthd) if (lower_32_bits(acthd) == head) return HANGCHECK_ACTIVE_LOOP; + do { + msleep(20); + + ring->hangcheck.acthd = acthd; + acthd = intel_ring_get_active_head(ring); + + ha = head_action(ring, acthd); + if (ha != HANGCHECK_HUNG) + return ha; + } while (retries--); + return HANGCHECK_HUNG; } -- 2.5.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx