On 03/14/2013 08:52 AM, Mika Kuoppala wrote: > To count context losses, add struct ctx_reset_state for > both i915_hw_context and drm_i915_file_private. > drm_i915_file_private is used when there is no context. > > Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 4 +++- > drivers/gpu/drm/i915/i915_drv.h | 19 +++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++++++++ > 3 files changed, 33 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index e16099b..7902d97 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1792,7 +1792,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) > struct drm_i915_file_private *file_priv; > > DRM_DEBUG_DRIVER("\n"); > - file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); > + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); > if (!file_priv) > return -ENOMEM; > > @@ -1801,6 +1801,8 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) > spin_lock_init(&file_priv->mm.lock); > INIT_LIST_HEAD(&file_priv->mm.request_list); > > + i915_gem_context_init_reset_state(dev, &file_priv->reset_state); > + > idr_init(&file_priv->context_idr); > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a54c507..d004548 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -433,6 +433,19 @@ struct i915_hw_ppgtt { > void (*cleanup)(struct i915_hw_ppgtt *ppgtt); > }; > > +struct ctx_reset_state { > + /* guilty and reset counts when context initialized */ > + unsigned long guilty_cnt; > + unsigned long reset_cnt; I think we can afford to spell out "count." The first time I saw cnt, it looked like a dirty word. :) I think this structure could you some better description of the overall architecture. It's not completely obvious from the individual pieces... and that makes it really hard to evaluate. reset_cnt is the number of resets since start-up. What is guilty_cnt? What are innocent and guilty (below)? All of this makes it difficult for me to tell whether or not the logic in patch 16 is correct... and I don't think it is. > + > + unsigned innocent; > + unsigned guilty; > + /* Time when this context was last blamed for a GPU reset. */ > + unsigned long last_guilty_reset; > + > + /* banned to submit more work */ > + bool banned; > +}; > > /* This must match up with the value previously used for execbuf2.rsvd1. */ > #define DEFAULT_CONTEXT_ID 0 > @@ -443,6 +456,7 @@ struct i915_hw_context { > struct drm_i915_file_private *file_priv; > struct intel_ring_buffer *ring; > struct drm_i915_gem_object *obj; > + struct ctx_reset_state reset_state; > }; > > enum no_fbc_reason { > @@ -805,6 +819,7 @@ struct i915_gpu_error { > > unsigned long last_reset; > > + unsigned long guilty_cnt; > /** > * State variable and reset counter controlling the reset flow > * > @@ -1257,6 +1272,8 @@ struct drm_i915_file_private { > struct list_head request_list; > } mm; > struct idr context_idr; > + > + struct ctx_reset_state reset_state; > }; > > #define INTEL_INFO(dev) (((struct drm_i915_private *) (dev)->dev_private)->info) > @@ -1677,6 +1694,8 @@ struct i915_hw_context * __must_check > i915_switch_context(struct intel_ring_buffer *ring, > struct drm_file *file, int to_id); > void i915_gem_context_free(struct kref *ctx_ref); > +void i915_gem_context_init_reset_state(struct drm_device *dev, > + struct ctx_reset_state *rs); > int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 8fb4d3c..dbd14b8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -145,6 +145,15 @@ static void do_destroy(struct i915_hw_context *ctx) > kfree(ctx); > } > > +void i915_gem_context_init_reset_state(struct drm_device *dev, > + struct ctx_reset_state *rs) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + rs->reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter); > + rs->guilty_cnt = dev_priv->gpu_error.guilty_cnt; > +} > + > static struct i915_hw_context * > create_hw_context(struct drm_device *dev, > struct drm_i915_file_private *file_priv) > @@ -177,6 +186,8 @@ create_hw_context(struct drm_device *dev, > > ctx->file_priv = file_priv; > > + i915_gem_context_init_reset_state(dev, &ctx->reset_state); > + > again: > if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { > ret = -ENOMEM; >