Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> writes: > If head seems stuck and engine in question is rcs, > inspect subunit state transitions before deciding that > this really is a hang instead of limited progress. > One thing to add into this description is that now as we always have one extra hangcheck step even in true stuck cases, this makes the hang declaration one tick longer, 7.5s. If this becomes a problem, we can sample the instdone state on first check converge back to old behaviour. One real, but hard to achive, improvement in determinism would be to normalize the amount of 'work' each gen does in a given tick. This way the shader progressions would be more or less equal and same amount of shader work would cause same hangcheck behaviour regardless of actual speed of computation. -Mika > References: https://bugs.freedesktop.org/show_bug.cgi?id=93029 > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> > Cc: Daniel Vetter <daniel@xxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_irq.c | 49 +++++++++++++++++++++++++++++---- > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + > 2 files changed, 45 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index e88d692..e6ae54f 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) > ring->hangcheck.deadlock = 0; > } > > -static enum intel_ring_hangcheck_action > -ring_stuck(struct intel_engine_cs *ring, u64 acthd) > +static bool subunits_stuck(struct intel_engine_cs *ring) > { > - struct drm_device *dev = ring->dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > - u32 tmp; > + int i; > + u32 instdone[I915_NUM_INSTDONE_REG]; > + bool stuck; > + > + if (ring->id != RCS) > + return true; > + > + i915_get_extra_instdone(ring->dev, instdone); > > + stuck = true; > + for (i = 0; i < I915_NUM_INSTDONE_REG; i++) { > + if (instdone[i] != ring->hangcheck.instdone[i]) > + stuck = false; > + > + ring->hangcheck.instdone[i] = instdone[i]; > + } > + > + return stuck; > +} > + > +static enum intel_ring_hangcheck_action > +head_stuck(struct intel_engine_cs *ring, u64 acthd) > +{ > if (acthd != ring->hangcheck.acthd) { > if (acthd > ring->hangcheck.max_acthd) { > ring->hangcheck.max_acthd = acthd; > @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) > return HANGCHECK_ACTIVE_LOOP; > } > > + if (!subunits_stuck(ring)) > + return HANGCHECK_ACTIVE_LOOP; > + > + return HANGCHECK_HUNG; > +} > + > +static enum intel_ring_hangcheck_action > +ring_stuck(struct intel_engine_cs *ring, u64 acthd) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + enum intel_ring_hangcheck_action ha; > + u32 tmp; > + > + ha = head_stuck(ring, acthd); > + if (ha != HANGCHECK_HUNG) > + return ha; > + > if (IS_GEN2(dev)) > return HANGCHECK_HUNG; > > @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > ring->hangcheck.score--; > > ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0; > + > + memset(ring->hangcheck.instdone, 0, > + sizeof(ring->hangcheck.instdone)); > } > > ring->hangcheck.seqno = seqno; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 5d1eb20..b8fe92e 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -93,6 +93,7 @@ struct intel_ring_hangcheck { > int score; > enum intel_ring_hangcheck_action action; > int deadlock; > + u32 instdone[I915_NUM_INSTDONE_REG]; > }; > > struct intel_ringbuffer { > -- > 2.5.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx