Re: [PATCH] drm/i915: Inspect subunit states on hangcheck

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/12/2015 12:17, Mika Kuoppala wrote:
If head seems stuck and engine in question is rcs,
inspect subunit state transitions before deciding that
this really is a hang instead of limited progress.

References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Dave Gordon <david.s.gordon@xxxxxxxxx>
Cc: Daniel Vetter <daniel@xxxxxxxx>
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_irq.c         | 49 +++++++++++++++++++++++++++++----
  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
  2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e88d692..e6ae54f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
  		ring->hangcheck.deadlock = 0;
  }

-static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_engine_cs *ring, u64 acthd)
+static bool subunits_stuck(struct intel_engine_cs *ring)
  {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 tmp;
+	int i;
+	u32 instdone[I915_NUM_INSTDONE_REG];
+	bool stuck;
+
+	if (ring->id != RCS)
+		return true;
+
+	i915_get_extra_instdone(ring->dev, instdone);

+	stuck = true;
+	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
+		if (instdone[i] != ring->hangcheck.instdone[i])
+			stuck = false;

This may not be completely reliable. Tomas Elf in his TDR tests observed that instdone kept changing even when CS is hung and in a stable state.

regards
Arun

+
+		ring->hangcheck.instdone[i] = instdone[i];
+	}
+
+	return stuck;
+}
+
+static enum intel_ring_hangcheck_action
+head_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
  	if (acthd != ring->hangcheck.acthd) {
  		if (acthd > ring->hangcheck.max_acthd) {
  			ring->hangcheck.max_acthd = acthd;
@@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
  		return HANGCHECK_ACTIVE_LOOP;
  	}

+	if (!subunits_stuck(ring))
+		return HANGCHECK_ACTIVE_LOOP;
+
+	return HANGCHECK_HUNG;
+}
+
+static enum intel_ring_hangcheck_action
+ring_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_ring_hangcheck_action ha;
+	u32 tmp;
+
+	ha = head_stuck(ring, acthd);
+	if (ha != HANGCHECK_HUNG)
+		return ha;
+
  	if (IS_GEN2(dev))
  		return HANGCHECK_HUNG;

@@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
  				ring->hangcheck.score--;

  			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
+
+			memset(ring->hangcheck.instdone, 0,
+			       sizeof(ring->hangcheck.instdone));
  		}

  		ring->hangcheck.seqno = seqno;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d1eb20..b8fe92e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
  	int score;
  	enum intel_ring_hangcheck_action action;
  	int deadlock;
+	u32 instdone[I915_NUM_INSTDONE_REG];
  };

  struct intel_ringbuffer {


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux