Watchdog timeout (or "media engine reset" as it is sometimes called, even though the render engine is also supported) is a feature that allows userland applications to enable hang detection on individual batch buffers. The detection mechanism itself is mostly bound to the hardware and the only thing that the driver needs to do to support this form of hang detection is to implement the interrupt handling support as well as watchdog instruction injection before and after the emitted batch buffer start instruction in the ring buffer. The principle of this hang detection mechanism is as follows: 1. Once the decision has been made to enable watchdog timeout for a particular batch buffer and the driver is in the process of emitting the batch buffer start instruction into the ring buffer it also emits a watchdog timer start instruction before and a watchdog timer cancellation instruction after the batch buffer instruction in the ring buffer. 2. Once the GPU execution reaches the watchdog timer start instruction the hardware watchdog counter is started by the hardware. The counter keeps counting until it reaches a previously configured threshold value. 2a. If the counter reaches the threshold value the hardware fires a watchdog interrupt that is picked up by the watchdog interrupt service routine in this commit. This means that a hang has been detected and the driver needs to deal with it the same way it would deal with a engine hang detected by the periodic hang checker. The only difference between the two is that we never promote full GPU reset following a watchdog timeout in case a per-engine reset was attempted too recently. Thusly, the watchdog interrupt handler calls the error handler directly passing the engine mask of the hung engine in question, which immediately results in a per-engine hang recovery being scheduled. 2b. If the batch buffer finishes executing and the execution reaches the watchdog cancellation instruction before the watchdog counter reaches its threshold value the watchdog is cancelled and nothing more comes of it. No hang was detected. Currently watchdog timeout for the render engine and all available media engines are supported. The specifications elude to the VECS engine being supported but that is currently not supported by this commit. The current default watchdog threshold value is 60 ms, since this has been emprically determined to be a good compromise for low-latency requirements and low rate of false positives. NOTE: I don't know if Ben Widawsky had any part in this code from 3 years ago. There have been so many people involved in this already that I am in no position to know. If I've missed anyone's sob line please let me know. Signed-off-by: Tomas Elf <tomas.elf@xxxxxxxxx> Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxx> Signed-off-by: Ian Lister <ian.lister@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_dma.c | 59 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 7 ++- drivers/gpu/drm/i915/i915_irq.c | 84 ++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_reg.h | 7 +++ drivers/gpu/drm/i915/intel_lrc.c | 99 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 31 +++++++++++ include/uapi/drm/i915_drm.h | 5 +- 8 files changed, 271 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e33e105..a89da48 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4183,7 +4183,7 @@ i915_wedged_set(void *data, u64 val) intel_runtime_pm_get(dev_priv); - i915_handle_error(dev, 0x0, val, + i915_handle_error(dev, 0x0, false, val, "Manually setting wedged to %llu", val); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 39b8f5f..bf1d45a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -792,6 +792,64 @@ i915_hangcheck_init(struct drm_device *dev) } } +void i915_watchdog_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int freq; + int i; + + /* + * Based on pre-defined time out value (60ms or 30ms) calculate + * timer count thresholds needed based on core frequency. + * + * For RCS. + * The timestamp resolution changed in Gen7 and beyond to 80ns + * for all pipes. Before that it was 640ns. + */ + +#define KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS 60 +#define KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS 60 +#define KM_TIMER_MILLISECOND 1000 + + /* + * Timestamp timer resolution = 0.080 uSec, + * or 12500000 counts per second + */ +#define KM_TIMESTAMP_CNTS_PER_SEC_80NS 12500000 + + /* + * Timestamp timer resolution = 0.640 uSec, + * or 1562500 counts per second + */ +#define KM_TIMESTAMP_CNTS_PER_SEC_640NS 1562500 + + if (INTEL_INFO(dev)->gen >= 7) + freq = KM_TIMESTAMP_CNTS_PER_SEC_80NS; + else + freq = KM_TIMESTAMP_CNTS_PER_SEC_640NS; + + dev_priv->ring[RCS].watchdog_threshold = + ((KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS) * + (freq / KM_TIMER_MILLISECOND)); + + dev_priv->ring[VCS].watchdog_threshold = + ((KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS) * + (freq / KM_TIMER_MILLISECOND)); + + dev_priv->ring[VCS2].watchdog_threshold = + ((KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS) * + (freq / KM_TIMER_MILLISECOND)); + + for (i = 0; i < I915_NUM_RINGS; i++) + dev_priv->ring[i].hangcheck.watchdog_count = 0; + + DRM_INFO("Watchdog Timeout [ms], " \ + "RCS: 0x%08X, VCS: 0x%08X, VCS2: 0x%08X\n", \ + KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS, + KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS, + KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS); +} + /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -973,6 +1031,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) i915_gem_load(dev); i915_hangcheck_init(dev); + i915_watchdog_init(dev); /* On the 945G/GM, the chipset reports the MSI capability on the * integrated graphics even though the support isn't actually there diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ef7c129..3d31872 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2563,6 +2563,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); +void i915_watchdog_init(struct drm_device *dev); static inline void i915_hangcheck_reinit(struct intel_engine_cs *engine) { struct intel_ring_hangcheck *hc = &engine->hangcheck; @@ -2578,9 +2579,9 @@ static inline void i915_hangcheck_reinit(struct intel_engine_cs *engine) /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); -__printf(4, 5) -void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged, - const char *fmt, ...); +__printf(5, 6) +void i915_handle_error(struct drm_device *dev, u32 engine_mask, + bool watchdog, bool wedged, const char *fmt, ...); extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_hpd_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4973826..5672a2c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1289,6 +1289,18 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, intel_lrc_irq_handler(&dev_priv->ring[RCS]); if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) notify_ring(&dev_priv->ring[RCS]); + if (tmp & (GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) { + struct intel_engine_cs *ring; + + /* Stop the counter to prevent further interrupts */ + ring = &dev_priv->ring[RCS]; + I915_WRITE(RING_CNTR(ring->mmio_base), + GEN6_RCS_WATCHDOG_DISABLE); + + ring->hangcheck.watchdog_count++; + i915_handle_error(ring->dev, intel_ring_flag(ring), true, true, + "Render engine watchdog timed out"); + } if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[BCS]); @@ -1308,11 +1320,35 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv, intel_lrc_irq_handler(&dev_priv->ring[VCS]); if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) notify_ring(&dev_priv->ring[VCS]); + if (tmp & (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) { + struct intel_engine_cs *ring; + + /* Stop the counter to prevent further interrupts */ + ring = &dev_priv->ring[VCS]; + I915_WRITE(RING_CNTR(ring->mmio_base), + GEN8_VCS_WATCHDOG_DISABLE); + + ring->hangcheck.watchdog_count++; + i915_handle_error(ring->dev, intel_ring_flag(ring), true, true, + "Media engine watchdog timed out"); + } if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) intel_lrc_irq_handler(&dev_priv->ring[VCS2]); if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) notify_ring(&dev_priv->ring[VCS2]); + if (tmp & (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) { + struct intel_engine_cs *ring; + + /* Stop the counter to prevent further interrupts */ + ring = &dev_priv->ring[VCS2]; + I915_WRITE(RING_CNTR(ring->mmio_base), + GEN8_VCS_WATCHDOG_DISABLE); + + ring->hangcheck.watchdog_count++; + i915_handle_error(ring->dev, intel_ring_flag(ring), true, true, + "Media engine 2 watchdog timed out"); + } } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -2573,6 +2609,7 @@ static void i915_report_and_clear_eir(struct drm_device *dev) * or if one of the current engine resets fails we fall * back to legacy full GPU reset. * + * @watchdog: true = Engine hang detected by hardware watchdog. * @wedged: true = Hang detected, invoke hang recovery. * @fmt, ...: Error message describing reason for error. * @@ -2584,8 +2621,8 @@ static void i915_report_and_clear_eir(struct drm_device *dev) * reset the associated engine. Failing that, try to fall back to legacy * full GPU reset recovery mode. */ -void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged, - const char *fmt, ...) +void i915_handle_error(struct drm_device *dev, u32 engine_mask, + bool watchdog, bool wedged, const char *fmt, ...) { struct drm_i915_private *dev_priv = dev->dev_private; va_list args; @@ -2617,20 +2654,27 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged, u32 i; for_each_ring(engine, dev_priv, i) { - u32 now, last_engine_reset_timediff; if (!(intel_ring_flag(engine) & engine_mask)) continue; - /* Measure the time since this engine was last reset */ - now = get_seconds(); - last_engine_reset_timediff = - now - engine->hangcheck.last_engine_reset_time; - - full_reset = last_engine_reset_timediff < - i915.gpu_reset_promotion_time; - - engine->hangcheck.last_engine_reset_time = now; + if (!watchdog) { + /* Measure the time since this engine was last reset */ + u32 now = get_seconds(); + u32 last_engine_reset_timediff = + now - engine->hangcheck.last_engine_reset_time; + + full_reset = last_engine_reset_timediff < + i915.gpu_reset_promotion_time; + + engine->hangcheck.last_engine_reset_time = now; + } else { + /* + * Watchdog timeout always results + * in engine reset. + */ + full_reset = false; + } /* * This engine was not reset too recently - go ahead @@ -2641,10 +2685,11 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged, * This can still be overridden by a global * reset e.g. if per-engine reset fails. */ - if (!full_reset) + if (watchdog || !full_reset) atomic_set_mask(I915_ENGINE_RESET_IN_PROGRESS, &engine->hangcheck.flags); - else + + if (full_reset) break; } /* for_each_ring */ @@ -2652,7 +2697,7 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged, if (full_reset) { atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG, - &dev_priv->gpu_error.reset_counter); + &dev_priv->gpu_error.reset_counter); } /* @@ -2990,7 +3035,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) */ tmp = I915_READ_CTL(ring); if (tmp & RING_WAIT) { - i915_handle_error(dev, intel_ring_flag(ring), false, + i915_handle_error(dev, intel_ring_flag(ring), false, false, "Kicking stuck wait on %s", ring->name); I915_WRITE_CTL(ring, tmp); @@ -3002,7 +3047,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) default: return HANGCHECK_HUNG; case 1: - i915_handle_error(dev, intel_ring_flag(ring), false, + i915_handle_error(dev, intel_ring_flag(ring), false, false, "Kicking stuck semaphore on %s", ring->name); I915_WRITE_CTL(ring, tmp); @@ -3135,7 +3180,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } if (engine_mask) - i915_handle_error(dev, engine_mask, true, "Ring hung (0x%02x)", engine_mask); + i915_handle_error(dev, engine_mask, false, true, "Ring hung (0x%02x)", engine_mask); if (busy_count) /* Reset timer case chip hangs without another request @@ -3589,11 +3634,14 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) { /* These are interrupts we'll toggle with the ring mask register */ uint32_t gt_interrupts[] = { + GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index af9f0ad..d2adb9b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1181,6 +1181,8 @@ enum skl_disp_power_wells { #define RING_HEAD(base) ((base)+0x34) #define RING_START(base) ((base)+0x38) #define RING_CTL(base) ((base)+0x3c) +#define RING_CNTR(base) ((base)+0x178) +#define RING_THRESH(base) ((base)+0x17C) #define RING_SYNC_0(base) ((base)+0x40) #define RING_SYNC_1(base) ((base)+0x44) #define RING_SYNC_2(base) ((base)+0x48) @@ -1584,6 +1586,11 @@ enum skl_disp_power_wells { #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ #define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) +#define GT_GEN6_RENDER_WATCHDOG_INTERRUPT (1 << 6) +#define GT_GEN8_RCS_WATCHDOG_INTERRUPT (1 << 6) +#define GEN6_RCS_WATCHDOG_DISABLE 1 +#define GT_GEN8_VCS_WATCHDOG_INTERRUPT (1 << 6) +#define GEN8_VCS_WATCHDOG_DISABLE 0xFFFFFFFF #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4a19385..ff9d27cb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1005,6 +1005,78 @@ static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, return 0; } +static int +gen8_ring_start_watchdog(struct intel_ringbuffer *ringbuf, struct intel_context *ctx) +{ + int ret; + struct intel_engine_cs *ring = ringbuf->ring; + + ret = intel_logical_ring_begin(ringbuf, ctx, 10); + if (ret) + return ret; + + /* + * i915_reg.h includes a warning to place a MI_NOOP + * before a MI_LOAD_REGISTER_IMM + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + /* Set counter period */ + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, RING_THRESH(ring->mmio_base)); + intel_logical_ring_emit(ringbuf, ring->watchdog_threshold); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + /* Start counter */ + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, RING_CNTR(ring->mmio_base)); + intel_logical_ring_emit(ringbuf, I915_WATCHDOG_ENABLE); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int +gen8_ring_stop_watchdog(struct intel_ringbuffer *ringbuf, struct intel_context *ctx) +{ + int ret; + struct intel_engine_cs *ring = ringbuf->ring; + + ret = intel_logical_ring_begin(ringbuf, ctx, 6); + if (ret) + return ret; + + /* + * i915_reg.h includes a warning to place a MI_NOOP + * before a MI_LOAD_REGISTER_IMM + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, RING_CNTR(ring->mmio_base)); + + switch (ring->id) { + default: + WARN(1, "%s does not support watchdog timeout! " \ + "Defaulting to render engine.\n", ring->name); + case RCS: + intel_logical_ring_emit(ringbuf, GEN6_RCS_WATCHDOG_DISABLE); + break; + case VCS: + case VCS2: + intel_logical_ring_emit(ringbuf, GEN8_VCS_WATCHDOG_DISABLE); + break; + } + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + /** * execlists_submission() - submit a batchbuffer for execution, Execlists style * @dev: DRM device. @@ -1035,6 +1107,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, int instp_mode; u32 instp_mask; int ret; + bool watchdog_running = false; instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; @@ -1086,6 +1159,18 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, if (ret) return ret; + /* Start watchdog timer */ + if (args->flags & I915_EXEC_ENABLE_WATCHDOG) { + if (!intel_ring_supports_watchdog(ring)) + return -EINVAL; + + ret = gen8_ring_start_watchdog(ringbuf, ctx); + if (ret) + return ret; + + watchdog_running = true; + } + if (ring == &dev_priv->ring[RCS] && instp_mode != dev_priv->relative_constants_mode) { ret = intel_logical_ring_begin(ringbuf, ctx, 4); @@ -1107,6 +1192,13 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); + /* Cancel watchdog timer */ + if (watchdog_running) { + ret = gen8_ring_stop_watchdog(ringbuf, ctx); + if (ret) + return ret; + } + i915_gem_execbuffer_move_to_active(vmas, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); @@ -1775,6 +1867,9 @@ static int logical_render_ring_init(struct drm_device *dev) if (HAS_L3_DPF(dev)) ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + ring->irq_keep_mask |= + (GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT); + if (INTEL_INFO(dev)->gen >= 9) ring->init_hw = gen9_init_render_ring; else @@ -1813,6 +1908,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_keep_mask |= + (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT); ring->init_hw = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; @@ -1842,6 +1939,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->irq_keep_mask |= + (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT); ring->init_hw = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 35360a4..9058789 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -30,6 +30,8 @@ struct intel_hw_status_page { struct drm_i915_gem_object *obj; }; +#define I915_WATCHDOG_ENABLE 0 + #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) @@ -136,6 +138,9 @@ struct intel_ring_hangcheck { /* Number of TDR hang detections */ u32 tdr_count; + + /* Number of watchdog hang detections for this ring */ + u32 watchdog_count; }; struct intel_ringbuffer { @@ -338,6 +343,12 @@ struct intel_engine_cs { /* Saved head value to be restored after reset */ u32 saved_head; + /* + * Watchdog timer threshold values + * only RCS, VCS, VCS2 rings have watchdog timeout support + */ + uint32_t watchdog_threshold; + struct { struct drm_i915_gem_object *obj; u32 gtt_offset; @@ -484,6 +495,26 @@ int intel_ring_save(struct intel_engine_cs *ring, int intel_ring_restore(struct intel_engine_cs *ring, struct drm_i915_gem_request *req); +static inline bool intel_ring_supports_watchdog(struct intel_engine_cs *ring) +{ + bool ret = false; + + if (WARN_ON(!ring)) + goto exit; + + ret = ( ring->id == RCS || + ring->id == VCS || + ring->id == VCS2); + + if (!ret) + DRM_ERROR("%s does not support watchdog timeout!\n", ring->name); + +exit: + return ret; +} +int intel_ring_start_watchdog(struct intel_engine_cs *ring); +int intel_ring_stop_watchdog(struct intel_engine_cs *ring); + int __must_check intel_ring_idle(struct intel_engine_cs *ring); void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_engine_cs *ring); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 4851d66..f8af7d2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -760,7 +760,10 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1 (1<<13) #define I915_EXEC_BSD_RING2 (2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/* Enable watchdog timer for this batch buffer */ +#define I915_EXEC_ENABLE_WATCHDOG (1<<15) + +#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx