When we detect a GPU hang, we emit a loud bang with the reason. But in the process, we also log each ring that hangs and then lose that information in the reason. Combine the two so that an accurate reason why we triggered the GPU hang is logged in the error state and so that we no longer need to emit the incremental hang detection. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_irq.c | 55 ++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 112a3e60a661..d08538bcfb28 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3190,9 +3190,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) container_of(work, typeof(*dev_priv), gpu_error.hangcheck_work.work); struct intel_engine_cs *engine; - int i; - int busy_count = 0, rings_hung = 0; bool stuck[I915_NUM_ENGINES] = { 0 }; + int busy_count = 0, hung = 0; + int i; #define BUSY 1 #define KICK 5 #define HUNG 20 @@ -3284,19 +3284,52 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->hangcheck.seqno = seqno; engine->hangcheck.acthd = acthd; busy_count += busy; + + hung += engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; } - for_each_engine(engine, dev_priv, i) { - if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { - DRM_INFO("%s on %s\n", - stuck[i] ? "stuck" : "no progress", - engine->name); - rings_hung++; + if (hung) { + char msg[512]; + int rings_stall, rings_stuck, len; + + len = rings_stall = rings_stuck = 0; + for_each_engine(engine, dev_priv, i) { + if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG && + stuck[i]) { + if (rings_stuck == 0) + len += snprintf(msg + len, + sizeof(msg)-len, + "Stuck on"); + len += snprintf(msg + len, sizeof(msg)-len, + " %s,", engine->name); + rings_stuck++; + } } - } + if (rings_stuck) + msg[--len] = '\0'; + + for_each_engine(engine, dev_priv, i) { + if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG && + !stuck[i]) { + if (rings_stall == 0) { + if (rings_stuck) + len += snprintf(msg + len, + sizeof(msg)-len, + "; "); + len += snprintf(msg + len, + sizeof(msg)-len, + "No progress on"); + } + len += snprintf(msg + len, sizeof(msg)-len, + " %s,", engine->name); + rings_stall++; + } + } + if (rings_stall) + msg[--len] = '\0'; - if (rings_hung) - return i915_handle_error(dev_priv->dev, true, "Ring hung"); + return i915_handle_error(dev_priv->dev, true, msg); + } if (busy_count) /* Reset timer case chip hangs without another request -- 2.1.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx