>From b6c143ee1f0b65053186be2602f876cc4fd944ab Mon Sep 17 00:00:00 2001 Message-Id: <b6c143ee1f0b65053186be2602f876cc4fd944ab.1387201899.git.ian.lister@xxxxxxxxx> In-Reply-To: <cover.1387201899.git.ian.lister@xxxxxxxxx> References: <cover.1387201899.git.ian.lister@xxxxxxxxx> From: ian-lister <ian.lister@xxxxxxxxx> Date: Tue, 10 Dec 2013 15:01:55 +0000 Subject: [RFC 08/13] drm/i915: TDR loose ends Added check for HANGCHECK_IDLE in i915_set_reset_status as a request should not be guilty if the ring is recorded as idle. Cancel the gpu_error.work item on calling i915_suspend. Take gpu_error.lock when clearing wedged condition in entervt_ioctl. Signed-off-by: ian-lister <ian.lister@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_dma.c | 2 ++ drivers/gpu/drm/i915/i915_drv.c | 46 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++ drivers/gpu/drm/i915/i915_gem.c | 14 +++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 5 files changed, 74 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e17a5d20..d45e936 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1661,6 +1661,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) acpi_video_register(); } + i915_init_watchdog(dev); + if (IS_GEN5(dev)) intel_gpu_ips_init(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 38cd27d..42ccfee 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -154,6 +154,11 @@ module_param_named(prefault_disable, i915_prefault_disable, bool, 0600); MODULE_PARM_DESC(prefault_disable, "Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only."); +int i915_enable_watchdog __read_mostly = 1; +module_param_named(i915_enable_watchdog, i915_enable_watchdog, int, 0644); +MODULE_PARM_DESC(i915_enable_watchdog, + "Enable watchdog timers (default: true)"); + static struct drm_driver driver; #if IS_ENABLED(CONFIG_AGP_INTEL) extern int intel_agp_enabled; @@ -736,6 +741,47 @@ int i915_resume(struct drm_device *dev) return 0; } +void i915_init_watchdog(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int freq; + int i; + u32 tout; + + /* Based on pre-defined time out value (60ms or 30ms) calculate + * timer count thresholds needed based on core frequency. + * + * For RCS: + * The timestamp resolution changed in Gen7 and beyond to 80ns + * for all pipes. Before that it was 640ns. + */ + if (INTEL_INFO(dev)->gen >= 7) + freq = KM_TIMESTAMP_CNTS_PER_SEC_80NS; + else + freq = KM_TIMESTAMP_CNTS_PER_SEC_640NS; + + for_each_ring(ring, dev_priv, i) { + switch (ring->id) { + case RCS: + tout = KM_MEDIA_ENGINE_TIMEOUT_VALUE_IN_MS; + break; + case VCS: + tout = KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS; + break; + default: + tout = 0; + break; + } + + ring->hangcheck.watchdog_threshold = + (tout * (freq / KM_TIMER_MILLISECOND)); + + DRM_DEBUG_TDR("%s watchdog threshold = %d\n", + ring->name, ring->hangcheck.watchdog_threshold); + } +} + /** * i915_reset - reset chip after a hang * @dev: drm device to reset diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1087e4e..6e379f1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1893,6 +1893,19 @@ struct drm_i915_file_private { #define GT_FREQUENCY_MULTIPLIER 50 +#define KM_MEDIA_ENGINE_TIMEOUT_VALUE_IN_MS 60 +#define KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS 60 +#define KM_TIMER_MILLISECOND 1000 + +/* Timestamp timer resolution = 0.080 uSec, or 12500000 counts per second*/ +#define KM_TIMESTAMP_CNTS_PER_SEC_80NS 12500000 + +/* Timestamp timer resolution = 0.640 uSec, or 1562500 counts per second*/ +#define KM_TIMESTAMP_CNTS_PER_SEC_640NS 1562500 + +#define KM_TIMER_MHZ 1000000 +#define KM_CD_CLK_FREQ (450 * KM_TIMER_MHZ) + #include "i915_trace.h" extern const struct drm_ioctl_desc i915_ioctls[]; @@ -1908,6 +1921,7 @@ extern int i915_vbt_sdvo_panel_type __read_mostly; extern int i915_enable_rc6 __read_mostly; extern int i915_enable_fbc __read_mostly; extern bool i915_enable_hangcheck __read_mostly; +extern int i915_enable_watchdog __read_mostly; extern int i915_enable_ppgtt __read_mostly; extern int i915_enable_psr __read_mostly; extern unsigned int i915_preliminary_hw_support __read_mostly; @@ -1942,6 +1956,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int i915_emit_box(struct drm_device *dev, struct drm_clip_rect *box, int DR1, int DR4); +extern void i915_init_watchdog(struct drm_device *dev); extern int intel_gpu_reset(struct drm_device *dev); extern int intel_gpu_engine_reset(struct drm_device *dev, enum intel_ring_id engine); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b76da4..a762444 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2251,7 +2251,7 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request, if (i915_head_inside_object(acthd, request->batch_obj, request_to_vm(request))) { *inside = true; - DRM_DEBUG_TDR("Head in obj 0x%08x\n", + DRM_DEBUG_TDR("Head in obj %p\n", request->batch_obj); return true; } @@ -2259,7 +2259,7 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request, if (i915_head_inside_request(acthd, request->head, request->tail)) { *inside = false; - DRM_DEBUG_TDR("Head in req 0x%08x (HD: 0x%08x TL: 0x%08x)\n", + DRM_DEBUG_TDR("Head in req %p (HD: 0x%08x TL: 0x%08x)\n", request, request->head, request->tail); return true; } @@ -2297,8 +2297,9 @@ void i915_set_reset_status(struct intel_ring_buffer *ring, offset = i915_gem_obj_offset(request->batch_obj, request_to_vm(request)); - if (ring->hangcheck.action != HANGCHECK_WAIT && - i915_request_guilty(request, acthd, &inside)) { + if (!((ring->hangcheck.action == HANGCHECK_WAIT) + || (ring->hangcheck.action == HANGCHECK_IDLE)) + && i915_request_guilty(request, acthd, &inside)) { DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", ring->name, inside ? "inside" : "flushing", @@ -4300,6 +4301,7 @@ i915_gem_suspend(struct drm_device *dev) del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); atomic_set(&dev_priv->gpu_error.active, 0); + cancel_work_sync(&dev_priv->gpu_error.work); cancel_delayed_work_sync(&dev_priv->mm.retire_work); cancel_delayed_work_sync(&dev_priv->mm.idle_work); @@ -4527,6 +4529,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) @@ -4534,7 +4537,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (i915_reset_in_progress(&dev_priv->gpu_error)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); + spin_lock_irqsave(&dev_priv->gpu_error.lock, irqflags); + dev_priv->gpu_error.reset_requests = 0; atomic_set(&dev_priv->gpu_error.reset_counter, 0); + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, irqflags); } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7235720..c3d6cbb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -69,6 +69,7 @@ struct intel_ring_hangcheck { u32 total; /* Total resets applied to this ring/engine*/ u32 last_head; /* Head value recorded at last hang */ u32 status_updated; + u32 watchdog_threshold; }; struct intel_ring_buffer { -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx