Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Only declare a missed interrupt if we find that the GPU is idle with > waiters and a hangcheck interval has passed in which no new user > interrupts have been raised. > > v2: Clear the stuck interrupt marker between successful batches > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 11 +++++++---- > drivers/gpu/drm/i915/i915_irq.c | 10 +++++++++- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ > 3 files changed, 18 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index c4df580ed0de..f3ba97ad3e00 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -730,10 +730,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data) > static void i915_ring_seqno_info(struct seq_file *m, > struct intel_engine_cs *ring) > { > - if (ring->get_seqno) { > - seq_printf(m, "Current sequence (%s): %x\n", > - ring->name, ring->get_seqno(ring)); > - } > + seq_printf(m, "Current sequence (%s): %x\n", > + ring->name, ring->get_seqno(ring)); > + seq_printf(m, "Current user interrupts (%s): %x\n", > + ring->name, READ_ONCE(ring->user_interrupts)); > } > > static int i915_gem_seqno_info(struct seq_file *m, void *data) > @@ -1361,6 +1361,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > seq_printf(m, "%s:\n", ring->name); > seq_printf(m, "\tseqno = %x [current %x]\n", > ring->hangcheck.seqno, seqno[i]); > + seq_printf(m, "\tuser interrupts = %x [current %x]\n", > + ring->hangcheck.user_interrupts, > + ring->user_interrupts); > seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", > (long long)ring->hangcheck.acthd, > (long long)acthd[i]); > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 07bc2cdd6252..c0aeff607130 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring) > return; > > trace_i915_gem_request_notify(ring); > + ring->user_interrupts++; > > wake_up_all(&ring->irq_queue); > } > @@ -3097,6 +3098,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > for_each_ring(ring, dev_priv, i) { > u64 acthd; > u32 seqno; > + unsigned user_interrupts; > bool busy = true; > > semaphore_clear_deadlocks(dev_priv); > @@ -3113,6 +3115,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > > acthd = intel_ring_get_active_head(ring); > seqno = ring->get_seqno(ring); > + user_interrupts = READ_ONCE(ring->user_interrupts); > > if (ring->hangcheck.seqno == seqno) { > if (ring_idle(ring, seqno)) { > @@ -3120,7 +3123,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > > if (waitqueue_active(&ring->irq_queue)) { > /* Issue a wake-up to catch stuck h/w. */ > - if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) { > + if (ring->hangcheck.user_interrupts == user_interrupts && > + !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) { > if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring))) > DRM_ERROR("Hangcheck timer elapsed... %s idle\n", > ring->name); > @@ -3183,10 +3187,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > > memset(ring->hangcheck.instdone, 0, > sizeof(ring->hangcheck.instdone)); > + > + /* Reset stuck interrupts between batch advances */ > + user_interrupts = 0; > } > > ring->hangcheck.seqno = seqno; > ring->hangcheck.acthd = acthd; > + ring->hangcheck.user_interrupts = user_interrupts; > busy_count += busy; > } > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 4cea04491392..dfb14bfe5bc8 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -90,6 +90,7 @@ struct intel_ring_hangcheck { > u64 acthd; > u64 max_acthd; > u32 seqno; > + unsigned user_interrupts; > int score; > enum intel_ring_hangcheck_action action; > int deadlock; > @@ -306,6 +307,7 @@ struct intel_engine_cs { > * inspecting request list. > */ > u32 last_submitted_seqno; > + unsigned user_interrupts; > > bool gpu_caches_dirty; > > -- > 2.7.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx