On Thu, Jan 23, 2014 at 09:49:43PM +0000, Chris Wilson wrote: > Currently we report through our error state only the rings that have > been initialised (as detected by ring->obj). This check is done after > the GPU reset and ring re-initialisation, which means that the software > state may not be the same as when we captured the hardware error and we > may not print out any of the vital information for debugging the hang. > > This (and the implied object leak) is a regression from > > commit 3d57e5bd1284f44e325f3a52d966259ed42f9e05 > Author: Ben Widawsky <ben@xxxxxxxxxxxx> > Date: Mon Oct 14 10:01:36 2013 -0700 > > drm/i915: Do a fuller init after reset > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ben Widawsky <ben@xxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++++++++++++------ > 2 files changed, 14 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index c45cbbecd66a..64a1aca7804d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -334,6 +334,7 @@ struct drm_i915_error_state { > struct timeval time; > > struct drm_i915_error_ring { > + int valid; bool > struct drm_i915_error_object { > int page_count; > u32 gtt_offset; > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 260a215e3619..e2af1d490f8d 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -240,6 +240,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m, > unsigned ring) > { > BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */ > + if (!error->ring[ring].valid) > + return; > + > err_printf(m, "%s command stream:\n", ring_str(ring)); > err_printf(m, " HEAD: 0x%08x\n", error->head[ring]); > err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]); > @@ -294,7 +297,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > struct drm_device *dev = error_priv->dev; > drm_i915_private_t *dev_priv = dev->dev_private; > struct drm_i915_error_state *error = error_priv->error; > - struct intel_ring_buffer *ring; > int i, j, page, offset, elt; > > if (!error) { > @@ -329,7 +331,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > if (INTEL_INFO(dev)->gen == 7) > err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); > > - for_each_ring(ring, dev_priv, i) > + for (i = 0; i < ARRAY_SIZE(error->ring); i++) > i915_ring_error_state(m, dev, error, i); > > for (i = 0; i < error->vm_count; i++) { > @@ -388,8 +390,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, > } > } > > - obj = error->ring[i].ctx; > - if (obj) { > + if ((obj = error->ring[i].ctx)) { Unrelated change. Although it does make this more consistent w/ the surrouding code. But I admit to not being a fan of assignments inside if statements. > err_printf(m, "%s --- HW Context = 0x%08x\n", > dev_priv->ring[i].name, > obj->gtt_offset); > @@ -826,11 +827,17 @@ static void i915_gem_record_rings(struct drm_device *dev, > struct drm_i915_error_state *error) > { > struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_ring_buffer *ring; > struct drm_i915_gem_request *request; > int i, count; > > - for_each_ring(ring, dev_priv, i) { > + for (i = 0; i < I915_NUM_RINGS; i++) { > + struct intel_ring_buffer *ring = &dev_priv->ring[i]; > + > + if (ring->dev == NULL) > + continue; > + > + error->ring[i].valid = true; > + The code here runs before the reset, and it would actually oops if ring->obj==NULL, so using for_each_ring() here looks appropriate. > i915_record_ring_state(dev, error, ring); > > error->ring[i].batchbuffer = > -- > 1.8.5.3 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx