Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Whilst investigating some mysterious failures with hangcheck not running > during gem_busy/basic-hang-default, the question is why did we decide to > cancel the retire_work (which queues the hangcheck)? That decision is > based around GT activity, so include that information in the debug > report. > > v2: Include the GT awake status in the error state > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Noticed while testing this that we might not be so interested in the instdone bits if the seqno is moving. But nothing to do with this patch. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++---- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ > 3 files changed, 17 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 9b697fd03721..ee4bf6f71cab 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1341,14 +1341,17 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > } else > seq_printf(m, "Hangcheck inactive\n"); > > + seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); > + > for_each_engine(engine, dev_priv, id) { > struct intel_breadcrumbs *b = &engine->breadcrumbs; > struct rb_node *rb; > > seq_printf(m, "%s:\n", engine->name); > - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", > + seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", > engine->hangcheck.seqno, seqno[id], > - intel_engine_last_submit(engine)); > + intel_engine_last_submit(engine), > + engine->timeline->inflight_seqnos); > seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", > yesno(intel_engine_has_waiter(engine)), > yesno(test_bit(engine->id, > @@ -3240,6 +3243,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) > > intel_runtime_pm_get(dev_priv); > > + seq_printf(m, "GT awake? %s\n", > + yesno(dev_priv->gt.awake)); > + seq_printf(m, "Global active requests: %d\n", > + dev_priv->gt.active_requests); > + > for_each_engine(engine, dev_priv, id) { > struct intel_breadcrumbs *b = &engine->breadcrumbs; > struct drm_i915_gem_request *rq; > @@ -3247,11 +3255,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) > u64 addr; > > seq_printf(m, "%s\n", engine->name); > - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", > + seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", > intel_engine_get_seqno(engine), > intel_engine_last_submit(engine), > engine->hangcheck.seqno, > - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); > + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), > + engine->timeline->inflight_seqnos); > > rcu_read_lock(); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 7a2f2e4468d6..66f19924828a 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -936,6 +936,7 @@ struct i915_gpu_state { > > char error_msg[128]; > bool simulated; > + bool awake; > int iommu; > u32 reset_count; > u32 suspend_count; > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 76855e1d8795..b4ae1464e0ab 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -632,6 +632,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > CSR_VERSION_MINOR(csr->version)); > } > > + err_printf(m, "GT awake: %s\n", yesno(error->awake)); > err_printf(m, "EIR: 0x%08x\n", error->eir); > err_printf(m, "IER: 0x%08x\n", error->ier); > for (i = 0; i < error->ngtier; i++) > @@ -1653,6 +1654,8 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, > static void i915_capture_gen_state(struct drm_i915_private *dev_priv, > struct i915_gpu_state *error) > { > + error->awake = dev_priv->gt.awake; > + > error->iommu = -1; > #ifdef CONFIG_INTEL_IOMMU > error->iommu = intel_iommu_gfx_mapped; > -- > 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx