This ioctl returns reset status for specified context. Returned status is context loss state with regards to previous call to this ioctl. It can one off: no_error : no context lost detected guilty : context was lost and this context was one submitting a batch which hung the gpu innocent : context was lost but this context fell victim to some other context which submitted batch which hung the gpu v2: get rid of state tracking completely and deliver only counts. Idea from Chris Wilson. Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com> --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.c | 42 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ include/uapi/drm/i915_drm.h | 19 ++++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 82732ee..00b6765 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1882,6 +1882,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d159d7a..67b023a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -823,6 +823,9 @@ int i915_reset(struct drm_device *dev) i915_gem_reset(dev); + /* Count unsuccessful ones */ + dev_priv->reset_count++; + ret = -ENODEV; if (get_seconds() - dev_priv->gpu_error.last_reset < 5) DRM_ERROR("GPU hanging too fast, declaring wedged!\n"); @@ -1228,3 +1231,42 @@ int i915_reg_read_ioctl(struct drm_device *dev, return 0; } + +int i915_get_reset_stats_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_reset_stats *args = data; + struct i915_reset_stats *rs = NULL; + int ret; + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + if (args->ctx_id == 0) { + rs = &file_priv->reset_stats; + ret = 0; + goto out; + } + + ring = &dev_priv->ring[RCS]; + + ret = i915_gem_context_get_reset_stats(ring, + file, + args->ctx_id, + &rs); +out: + if (rs && ret == 0) { + args->global_lost = dev_priv->reset_count; + args->total_lost = rs->total; + args->innocent_lost = rs->innocent; + args->guilty_lost = rs->guilty; + } + + mutex_unlock(&dev->struct_mutex); + + return ret ? -EINVAL : 0; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 42fcfb6..f43a482 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1040,6 +1040,10 @@ typedef struct drm_i915_private { /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; + + /* get_reset_stats ioctl */ + u32 reset_count; + } drm_i915_private_t; /* Iterate over initialised rings */ @@ -1832,6 +1836,8 @@ extern int intel_enable_rc6(const struct drm_device *dev); extern bool i915_semaphore_is_enabled(struct drm_device *dev); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); /* overlay */ #ifdef CONFIG_DEBUG_FS diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 07d5941..8f4f5e2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_SET_CACHING 0x2f #define DRM_I915_GEM_GET_CACHING 0x30 #define DRM_I915_REG_READ 0x31 +#define DRM_I915_GET_RESET_STATS 0x32 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) +#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -980,4 +982,21 @@ struct drm_i915_reg_read { __u64 offset; __u64 val; /* Return value */ }; + +struct drm_i915_reset_stats { + __u32 ctx_id; + __u32 flags; + + /* For all contexts */ + __u32 global_lost; + + /* For this context */ + __u32 total_lost; + __u32 innocent_lost; + __u32 guilty_lost; + + /* unknown_lost == + * total - (innocent + guilty) */ +}; + #endif /* _UAPI_I915_DRM_H_ */ -- 1.7.9.5