Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> writes: > Instead of going through all the requests to find a batch that > hanged the machine, use hangcheck score and the fact that > first noncompleted request on hanged ring is, with great > probability, the guilty one. This also ensure that we get one > guilty batch per hang instead of possibly more (for each ring) > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73652 Suggested-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> missing in here. > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 19 ++++++++++--------- > drivers/gpu/drm/i915/i915_irq.c | 3 +-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ > 3 files changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d270351..27a97c3 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2322,20 +2322,17 @@ static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs) > > static void i915_set_reset_status(struct intel_ring_buffer *ring, > struct drm_i915_gem_request *request, > - u32 acthd) > + u32 acthd, const bool guilty) > { > struct i915_ctx_hang_stats *hs = NULL; > - bool inside, guilty; > + bool inside; > unsigned long offset = 0; > > - /* Innocent until proven guilty */ > - guilty = false; > - > if (request->batch_obj) > offset = i915_gem_obj_offset(request->batch_obj, > request_to_vm(request)); > > - if (ring->hangcheck.action != HANGCHECK_WAIT && > + if (guilty && > i915_request_guilty(request, acthd, &inside)) { > DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", > ring->name, > @@ -2343,8 +2340,6 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, > offset, > request->ctx ? request->ctx->id : 0, > acthd); > - > - guilty = true; > } > > /* If contexts are disabled or this is the default context, use > @@ -2383,12 +2378,18 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, > u32 completed_seqno = ring->get_seqno(ring, false); > u32 acthd = intel_ring_get_active_head(ring); > struct drm_i915_gem_request *request; > + bool guilty = false; > > list_for_each_entry(request, &ring->request_list, list) { > if (i915_seqno_passed(completed_seqno, request->seqno)) > continue; > > - i915_set_reset_status(ring, request, acthd); > + if (!guilty && ring->hangcheck.score >= HANGCHECK_SCORE_GUILTY) { > + guilty = true; > + i915_set_reset_status(ring, request, acthd, true); > + } else { > + i915_set_reset_status(ring, request, acthd, false); > + } > } > } > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 6d11e25..e24f9ef 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2473,7 +2473,6 @@ static void i915_hangcheck_elapsed(unsigned long data) > #define BUSY 1 > #define KICK 5 > #define HUNG 20 > -#define FIRE 30 > > if (!i915_enable_hangcheck) > return; > @@ -2557,7 +2556,7 @@ static void i915_hangcheck_elapsed(unsigned long data) > } > > for_each_ring(ring, dev_priv, i) { > - if (ring->hangcheck.score > FIRE) { > + if (ring->hangcheck.score >= HANGCHECK_SCORE_GUILTY) { > DRM_INFO("%s on %s\n", > stuck[i] ? "stuck" : "no progress", > ring->name); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 71a73f4..6018793 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -41,6 +41,8 @@ enum intel_ring_hangcheck_action { > HANGCHECK_HUNG, > }; > > +#define HANGCHECK_SCORE_GUILTY 31 > + > struct intel_ring_hangcheck { > bool deadlock; > u32 seqno; > -- > 1.7.9.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx