[PATCH] drm/i915: Make hangcheck logging more compact

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When we detect a GPU hang, we emit a loud bang with the reason. But in
the process, we also log each ring that hangs and then lose that
information in the reason. Combine the two so that an accurate reason
why we triggered the GPU hang is logged in the error state and so that
we no longer need to emit the incremental hang detection.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_irq.c | 55 ++++++++++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 112a3e60a661..d08538bcfb28 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3190,9 +3190,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		container_of(work, typeof(*dev_priv),
 			     gpu_error.hangcheck_work.work);
 	struct intel_engine_cs *engine;
-	int i;
-	int busy_count = 0, rings_hung = 0;
 	bool stuck[I915_NUM_ENGINES] = { 0 };
+	int busy_count = 0, hung = 0;
+	int i;
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
@@ -3284,19 +3284,52 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		engine->hangcheck.seqno = seqno;
 		engine->hangcheck.acthd = acthd;
 		busy_count += busy;
+
+		hung += engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 	}
 
-	for_each_engine(engine, dev_priv, i) {
-		if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
-			DRM_INFO("%s on %s\n",
-				 stuck[i] ? "stuck" : "no progress",
-				 engine->name);
-			rings_hung++;
+	if (hung) {
+		char msg[512];
+		int rings_stall, rings_stuck, len;
+
+		len = rings_stall = rings_stuck = 0;
+		for_each_engine(engine, dev_priv, i) {
+			if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG &&
+			    stuck[i]) {
+				if (rings_stuck == 0)
+					len += snprintf(msg + len,
+							sizeof(msg)-len,
+							"Stuck on");
+				len += snprintf(msg + len, sizeof(msg)-len,
+						" %s,", engine->name);
+				rings_stuck++;
+			}
 		}
-	}
+		if (rings_stuck)
+			msg[--len] = '\0';
+
+		for_each_engine(engine, dev_priv, i) {
+			if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG &&
+			    !stuck[i]) {
+				if (rings_stall == 0) {
+					if (rings_stuck)
+						len += snprintf(msg + len,
+								sizeof(msg)-len,
+								"; ");
+					len += snprintf(msg + len,
+							sizeof(msg)-len,
+							"No progress on");
+				}
+				len += snprintf(msg + len, sizeof(msg)-len,
+						" %s,", engine->name);
+				rings_stall++;
+			}
+		}
+		if (rings_stall)
+			msg[--len] = '\0';
 
-	if (rings_hung)
-		return i915_handle_error(dev_priv->dev, true, "Ring hung");
+		return i915_handle_error(dev_priv->dev, true, msg);
+	}
 
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
-- 
2.1.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux