On 10/30/2013 06:44 AM, Mika Kuoppala wrote: > This ioctl returns reset stats for specified context. > > The struct returned contains context loss counters. > > reset_count: all resets across all contexts > batch_active: active batches lost on resets > batch_pending: pending batches lost on resets > > v2: get rid of state tracking completely and deliver only counts. Idea > from Chris Wilson. > > v3: fix commit message > > v4: default context handled inside i915_gem_context_get_hang_stats > > v5: reset_count only for priviledged process > > v6: ctx=0 needs CAP_SYS_ADMIN for batch_* counters (Chris Wilson) > > v7: context hang stats never returns NULL > > v8: rebased on top of reworked context hang stats > DRM_RENDER_ALLOW for ioctl > > v9: use DEFAULT_CONTEXT_ID. Improve comments for ioctl struct members > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > Cc: Ian Romanick <idr@xxxxxxxxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > --- > drivers/gpu/drm/i915/i915_dma.c | 1 + > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/intel_uncore.c | 34 ++++++++++++++++++++++++++++++++++ > include/uapi/drm/i915_drm.h | 19 +++++++++++++++++++ > 4 files changed, 56 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 6eecce7..f2cdeb2 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1921,6 +1921,7 @@ const struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > > int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 9fd716d..8870804 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2369,6 +2369,8 @@ extern int intel_enable_rc6(const struct drm_device *dev); > extern bool i915_semaphore_is_enabled(struct drm_device *dev); > int i915_reg_read_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > +int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file); > > /* overlay */ > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index f6fae35..21cf951 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -633,6 +633,40 @@ int i915_reg_read_ioctl(struct drm_device *dev, > return 0; > } > > +int i915_get_reset_stats_ioctl(struct drm_device *dev, > + void *data, struct drm_file *file) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_reset_stats *args = data; > + struct i915_ctx_hang_stats *hs; > + int ret; > + > + if (args->ctx_id == DEFAULT_CONTEXT_ID && !capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > + ret = mutex_lock_interruptible(&dev->struct_mutex); > + if (ret) > + return ret; > + > + hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id); > + if (IS_ERR(hs)) { > + mutex_unlock(&dev->struct_mutex); > + return PTR_ERR(hs); > + } > + > + if (capable(CAP_SYS_ADMIN)) > + args->reset_count = i915_reset_count(&dev_priv->gpu_error); > + else > + args->reset_count = 0; We're having some additional debate about issues related to this. Eric (added to CC so he'll notice) believes that we may encounter memory corruption around a reset (most likely causing the reset instead of the other way around). This means that we may need to deliver a reset notification to an otherwise unaffected GL context after all. :( If we decided that this is possible, we should deliver a single bit to user mode that says "there was a reset after this context was created." I assume that could be returned to user space in the flags field? I don't think this provides the same potential information leak as directly exposing the global reset count, but I could be wrong. I don't think we need to change anything /yet/, but we may need to soon. > + > + args->batch_active = hs->batch_active; > + args->batch_pending = hs->batch_pending; > + > + mutex_unlock(&dev->struct_mutex); > + > + return 0; > +} > + > static int i965_reset_complete(struct drm_device *dev) > { > u8 gdrst; > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 3a4e97b..52aed89 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -222,6 +222,7 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_GEM_SET_CACHING 0x2f > #define DRM_I915_GEM_GET_CACHING 0x30 > #define DRM_I915_REG_READ 0x31 > +#define DRM_I915_GET_RESET_STATS 0x32 > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) > @@ -271,6 +272,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) > #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) > #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) > +#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) > > /* Allow drivers to submit batchbuffers directly to hardware, relying > * on the security mechanisms provided by hardware. > @@ -1030,4 +1032,21 @@ struct drm_i915_reg_read { > __u64 offset; > __u64 val; /* Return value */ > }; > + > +struct drm_i915_reset_stats { > + __u32 ctx_id; > + __u32 flags; > + > + /* All resets since boot/module reload, for all contexts */ > + __u32 reset_count; > + > + /* Number of batches lost when active in GPU, for this context */ > + __u32 batch_active; > + > + /* Number of batches lost pending for execution, for this context */ > + __u32 batch_pending; > + > + __u32 pad; > +}; > + > #endif /* _UAPI_I915_DRM_H_ */ > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx