Re: [PATCH v2] drm/i915: Include GT/seqno activity in engine/hangcheck debugfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> Whilst investigating some mysterious failures with hangcheck not running
> during gem_busy/basic-hang-default, the question is why did we decide to
> cancel the retire_work (which queues the hangcheck)? That decision is
> based around GT activity, so include that information in the debug
> report.
>
> v2: Include the GT awake status in the error state
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>

Noticed while testing this that we might not be so
interested in the instdone bits if the seqno is moving.
But nothing to do with this patch.

Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   | 17 +++++++++++++----
>  drivers/gpu/drm/i915/i915_drv.h       |  1 +
>  drivers/gpu/drm/i915/i915_gpu_error.c |  3 +++
>  3 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9b697fd03721..ee4bf6f71cab 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1341,14 +1341,17 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	} else
>  		seq_printf(m, "Hangcheck inactive\n");
>  
> +	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
> +
>  	for_each_engine(engine, dev_priv, id) {
>  		struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  		struct rb_node *rb;
>  
>  		seq_printf(m, "%s:\n", engine->name);
> -		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
> +		seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n",
>  			   engine->hangcheck.seqno, seqno[id],
> -			   intel_engine_last_submit(engine));
> +			   intel_engine_last_submit(engine),
> +			   engine->timeline->inflight_seqnos);
>  		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n",
>  			   yesno(intel_engine_has_waiter(engine)),
>  			   yesno(test_bit(engine->id,
> @@ -3240,6 +3243,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  
>  	intel_runtime_pm_get(dev_priv);
>  
> +	seq_printf(m, "GT awake? %s\n",
> +		   yesno(dev_priv->gt.awake));
> +	seq_printf(m, "Global active requests: %d\n",
> +		   dev_priv->gt.active_requests);
> +
>  	for_each_engine(engine, dev_priv, id) {
>  		struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  		struct drm_i915_gem_request *rq;
> @@ -3247,11 +3255,12 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  		u64 addr;
>  
>  		seq_printf(m, "%s\n", engine->name);
> -		seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
> +		seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
>  			   intel_engine_get_seqno(engine),
>  			   intel_engine_last_submit(engine),
>  			   engine->hangcheck.seqno,
> -			   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
> +			   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
> +			   engine->timeline->inflight_seqnos);
>  
>  		rcu_read_lock();
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7a2f2e4468d6..66f19924828a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -936,6 +936,7 @@ struct i915_gpu_state {
>  
>  	char error_msg[128];
>  	bool simulated;
> +	bool awake;
>  	int iommu;
>  	u32 reset_count;
>  	u32 suspend_count;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 76855e1d8795..b4ae1464e0ab 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -632,6 +632,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  			   CSR_VERSION_MINOR(csr->version));
>  	}
>  
> +	err_printf(m, "GT awake: %s\n", yesno(error->awake));
>  	err_printf(m, "EIR: 0x%08x\n", error->eir);
>  	err_printf(m, "IER: 0x%08x\n", error->ier);
>  	for (i = 0; i < error->ngtier; i++)
> @@ -1653,6 +1654,8 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
>  static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
>  				   struct i915_gpu_state *error)
>  {
> +	error->awake = dev_priv->gt.awake;
> +
>  	error->iommu = -1;
>  #ifdef CONFIG_INTEL_IOMMU
>  	error->iommu = intel_iommu_gfx_mapped;
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux