Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Remove some redundant kernel messages as we deduce a hung GPU and > capture the error state. > > v2: Fix "hang" vs "no progress" message whilst I was there > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_irq.c | 41 ++++++++++++++++++++++++++--------------- > 1 file changed, 26 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 34e25fc2b90a..860235d1e0bf 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -3083,9 +3083,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > container_of(work, typeof(*dev_priv), > gpu_error.hangcheck_work.work); > struct intel_engine_cs *engine; > - enum intel_engine_id id; > - int busy_count = 0, rings_hung = 0; > - bool stuck[I915_NUM_ENGINES] = { 0 }; > + unsigned hung = 0, stuck = 0; > + int busy_count = 0; > #define BUSY 1 > #define KICK 5 > #define HUNG 20 > @@ -3103,7 +3102,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > */ > intel_uncore_arm_unclaimed_mmio_detection(dev_priv); > > - for_each_engine_id(engine, dev_priv, id) { > + for_each_engine(engine, dev_priv) { > bool busy = intel_engine_has_waiter(engine); > u64 acthd; > u32 seqno; > @@ -3166,10 +3165,15 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > break; > case HANGCHECK_HUNG: > engine->hangcheck.score += HUNG; > - stuck[id] = true; > break; > } > } > + > + if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { > + hung |= intel_engine_flag(engine); > + if (engine->hangcheck.action != HANGCHECK_HUNG) > + stuck |= intel_engine_flag(engine); > + } > } else { > engine->hangcheck.action = HANGCHECK_ACTIVE; > > @@ -3194,17 +3198,24 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > busy_count += busy; > } > > - for_each_engine_id(engine, dev_priv, id) { > - if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { > - DRM_INFO("%s on %s\n", > - stuck[id] ? "stuck" : "no progress", > - engine->name); > - rings_hung |= intel_engine_flag(engine); > - } > - } > + if (hung) { > + char msg[80]; > + int len; > > - if (rings_hung) > - i915_handle_error(dev_priv, rings_hung, "Engine(s) hung"); > + /* If some rings hung but others were still busy, only > + * blame the hanging rings in the synopsis. > + */ > + if (stuck != hung) > + hung &= ~stuck; > + len = snprintf(msg, sizeof(msg), > + "%s on ", stuck == hung ? "No progress" : "Hang"); > + for_each_engine_masked(engine, dev_priv, hung) > + len += snprintf(msg + len, sizeof(msg) - len, > + "%s, ", engine->name); > + msg[len-2] = '\0'; > + msg[len-1] ? snprintf returns the bytes that would have been written so there is possibility to overwrite the stack here. Safer to use scnprintf -Mika > + return i915_handle_error(dev_priv, hung, msg); > + } > > /* Reset timer in case GPU hangs without another request being added */ > if (busy_count) > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx