On Mon, May 13, 2013 at 04:32:10PM +0300, Mika Kuoppala wrote: > Instead of relying in acthd, track ring seqno progression > to detect if ring has hung. > > v2: put hangcheck stuff inside struct (Chris Wilson) > > Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 -- > drivers/gpu/drm/i915/i915_irq.c | 30 +++++++++++++----------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++ > 3 files changed, 19 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 14817de..db7cda9 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -834,8 +834,6 @@ struct i915_gpu_error { > #define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) > struct timer_list hangcheck_timer; > int hangcheck_count; > - uint32_t last_acthd[I915_NUM_RINGS]; > - uint32_t prev_instdone[I915_NUM_INSTDONE_REG]; > > /* For reset and error_state handling. */ > spinlock_t lock; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 0e5c9b0..004ad34 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2384,22 +2384,19 @@ void i915_hangcheck_elapsed(unsigned long data) > { > struct drm_device *dev = (struct drm_device *)data; > drm_i915_private_t *dev_priv = dev->dev_private; > - uint32_t acthd[I915_NUM_RINGS], instdone[I915_NUM_INSTDONE_REG]; > struct intel_ring_buffer *ring; > bool err = false, idle; > int i; > + u32 seqno[I915_NUM_RINGS]; > + bool work_done; > > if (!i915_enable_hangcheck) > return; > > - memset(acthd, 0, sizeof(acthd)); > idle = true; > for_each_ring(ring, dev_priv, i) { > - u32 seqno; > - > - seqno = ring->get_seqno(ring, false); > - idle &= i915_hangcheck_ring_idle(ring, seqno, &err); > - acthd[i] = intel_ring_get_active_head(ring); > + seqno[i] = ring->get_seqno(ring, false); > + idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err); > } > > /* If all work is done then ACTHD clearly hasn't advanced. */ > @@ -2415,20 +2412,19 @@ void i915_hangcheck_elapsed(unsigned long data) > return; > } > > - i915_get_extra_instdone(dev, instdone); > - if (memcmp(dev_priv->gpu_error.last_acthd, acthd, > - sizeof(acthd)) == 0 && > - memcmp(dev_priv->gpu_error.prev_instdone, instdone, > - sizeof(instdone)) == 0) { > + work_done = false; > + for_each_ring(ring, dev_priv, i) { > + if (ring->hangcheck.seqno != seqno[i]) { > + work_done = true; > + ring->hangcheck.seqno = seqno[i]; > + } > + } > + > + if (!work_done) { > if (i915_hangcheck_hung(dev)) > return; > } else { > dev_priv->gpu_error.hangcheck_count = 0; > - > - memcpy(dev_priv->gpu_error.last_acthd, acthd, > - sizeof(acthd)); > - memcpy(dev_priv->gpu_error.prev_instdone, instdone, > - sizeof(instdone)); > } > > repeat: > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index dac1614..ef374a8 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -37,6 +37,10 @@ struct intel_hw_status_page { > #define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base)) > #define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base)) > > +struct intel_ring_hangcheck { > + u32 seqno; > +}; > + Shouldn't you initialize this thing in i915_gem_init_seqno()? > struct intel_ring_buffer { > const char *name; > enum intel_ring_id { > @@ -137,6 +141,8 @@ struct intel_ring_buffer { > struct i915_hw_context *default_context; > struct i915_hw_context *last_context; > > + struct intel_ring_hangcheck hangcheck; > + > void *private; > }; > > -- > 1.7.9.5 > -- Ben Widawsky, Intel Open Source Technology Center