Re: [PATCH] drm/i915: Inspect subunit states on hangcheck

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> writes:

> If head seems stuck and engine in question is rcs,
> inspect subunit state transitions before deciding that
> this really is a hang instead of limited progress.
>

One thing to add into this description is that
now as we always have one extra hangcheck step even
in true stuck cases, this makes the hang declaration
one tick longer, 7.5s.

If this becomes a problem, we can sample the instdone
state on first check converge back to old behaviour.

One real, but hard to achive, improvement in determinism
would be to normalize the amount of 'work' each gen does in
a given tick. This way the shader progressions would be
more or less equal and same amount of shader work would
cause same hangcheck behaviour regardless of actual
speed of computation.

-Mika

> References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Dave Gordon <david.s.gordon@xxxxxxxxx>
> Cc: Daniel Vetter <daniel@xxxxxxxx>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_irq.c         | 49 +++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  2 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692..e6ae54f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>  		ring->hangcheck.deadlock = 0;
>  }
>  
> -static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +static bool subunits_stuck(struct intel_engine_cs *ring)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 tmp;
> +	int i;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
> +	bool stuck;
> +
> +	if (ring->id != RCS)
> +		return true;
> +
> +	i915_get_extra_instdone(ring->dev, instdone);
>  
> +	stuck = true;
> +	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
> +		if (instdone[i] != ring->hangcheck.instdone[i])
> +			stuck = false;
> +
> +		ring->hangcheck.instdone[i] = instdone[i];
> +	}
> +
> +	return stuck;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +head_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
>  	if (acthd != ring->hangcheck.acthd) {
>  		if (acthd > ring->hangcheck.max_acthd) {
>  			ring->hangcheck.max_acthd = acthd;
> @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
>  		return HANGCHECK_ACTIVE_LOOP;
>  	}
>  
> +	if (!subunits_stuck(ring))
> +		return HANGCHECK_ACTIVE_LOOP;
> +
> +	return HANGCHECK_HUNG;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	enum intel_ring_hangcheck_action ha;
> +	u32 tmp;
> +
> +	ha = head_stuck(ring, acthd);
> +	if (ha != HANGCHECK_HUNG)
> +		return ha;
> +
>  	if (IS_GEN2(dev))
>  		return HANGCHECK_HUNG;
>  
> @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  				ring->hangcheck.score--;
>  
>  			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +
> +			memset(ring->hangcheck.instdone, 0,
> +			       sizeof(ring->hangcheck.instdone));
>  		}
>  
>  		ring->hangcheck.seqno = seqno;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..b8fe92e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
>  	int score;
>  	enum intel_ring_hangcheck_action action;
>  	int deadlock;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
>  };
>  
>  struct intel_ringbuffer {
> -- 
> 2.5.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux