On 01/12/2015 12:17, Mika Kuoppala wrote:
If head seems stuck and engine in question is rcs, inspect subunit state transitions before deciding that this really is a hang instead of limited progress. References: https://bugs.freedesktop.org/show_bug.cgi?id=93029 Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> Cc: Daniel Vetter <daniel@xxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_irq.c | 49 +++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e88d692..e6ae54f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) ring->hangcheck.deadlock = 0; } -static enum intel_ring_hangcheck_action -ring_stuck(struct intel_engine_cs *ring, u64 acthd) +static bool subunits_stuck(struct intel_engine_cs *ring) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - u32 tmp; + int i; + u32 instdone[I915_NUM_INSTDONE_REG]; + bool stuck; + + if (ring->id != RCS) + return true; + + i915_get_extra_instdone(ring->dev, instdone); + stuck = true; + for (i = 0; i < I915_NUM_INSTDONE_REG; i++) { + if (instdone[i] != ring->hangcheck.instdone[i]) + stuck = false;
This may not be completely reliable. Tomas Elf in his TDR tests observed that instdone kept changing even when CS is hung and in a stable state.
regards Arun
+ + ring->hangcheck.instdone[i] = instdone[i]; + } + + return stuck; +} + +static enum intel_ring_hangcheck_action +head_stuck(struct intel_engine_cs *ring, u64 acthd) +{ if (acthd != ring->hangcheck.acthd) { if (acthd > ring->hangcheck.max_acthd) { ring->hangcheck.max_acthd = acthd; @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) return HANGCHECK_ACTIVE_LOOP; } + if (!subunits_stuck(ring)) + return HANGCHECK_ACTIVE_LOOP; + + return HANGCHECK_HUNG; +} + +static enum intel_ring_hangcheck_action +ring_stuck(struct intel_engine_cs *ring, u64 acthd) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + enum intel_ring_hangcheck_action ha; + u32 tmp; + + ha = head_stuck(ring, acthd); + if (ha != HANGCHECK_HUNG) + return ha; + if (IS_GEN2(dev)) return HANGCHECK_HUNG; @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) ring->hangcheck.score--; ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0; + + memset(ring->hangcheck.instdone, 0, + sizeof(ring->hangcheck.instdone)); } ring->hangcheck.seqno = seqno; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5d1eb20..b8fe92e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -93,6 +93,7 @@ struct intel_ring_hangcheck { int score; enum intel_ring_hangcheck_action action; int deadlock; + u32 instdone[I915_NUM_INSTDONE_REG]; }; struct intel_ringbuffer {
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx