From: Tomas Elf <tomas.elf@xxxxxxxxx> *** General *** Watchdog timeout (or "media engine reset") is a feature that allows userland applications to enable hang detection on individual batch buffers. The detection mechanism itself is mostly bound to the hardware and the only thing that the driver needs to do to support this form of hang detection is to implement the interrupt handling support as well as watchdog command emission before and after the emitted batch buffer start instruction in the ring buffer. The principle of the hang detection mechanism is as follows: 1. Once the decision has been made to enable watchdog timeout for a particular batch buffer and the driver is in the process of emitting the batch buffer start instruction into the ring buffer it also emits a watchdog timer start instruction before and a watchdog timer cancellation instruction after the batch buffer start instruction in the ring buffer. 2. Once the GPU execution reaches the watchdog timer start instruction the hardware watchdog counter is started by the hardware. The counter keeps counting until either reaching a previously configured threshold value or the timer cancellation instruction is executed. 2a. If the counter reaches the threshold value the hardware fires a watchdog interrupt that is picked up by the watchdog interrupt handler. This means that a hang has been detected and the driver needs to deal with it the same way it would deal with a engine hang detected by the periodic hang checker. The only difference between the two is that we never promote full GPU reset following a watchdog timeout in case a per-engine reset was attempted too recently. Thusly, the watchdog interrupt handler calls the error handler directly passing the engine mask of the hung engine in question, which immediately results in a per-engine hang recovery being scheduled. 2b. If the batch buffer completes and the execution reaches the watchdog cancellation instruction before the watchdog counter reaches its threshold value the watchdog is cancelled and nothing more comes of it. No hang is detected. *** This patch introduces: *** 1. Command emission into the ring buffer for starting and stopping the watchdog timer before/after batch buffer start during batch buffer submission. 2. Feature support query functions for verifying that the requested engine actually supports watchdog timeout and fails the batch buffer submission otherwise. NOTE: I don't know if Ben Widawsky had any part in this code from 3 years ago. There have been so many people involved in this already that I am in no position to know. If I've missed anyone's sob line please let me know. Signed-off-by: Tomas Elf <tomas.elf@xxxxxxxxx> Signed-off-by: Ian Lister <ian.lister@xxxxxxxxx> Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/intel_lrc.c | 99 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 22 ++++++++ 2 files changed, 121 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6efbcd7..43d424f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1095,6 +1095,80 @@ int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request) return intel_logical_ring_begin(request, 0); } +static int +gen8_ring_start_watchdog(struct drm_i915_gem_request *req) +{ + int ret; + struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_engine_cs *ring = ringbuf->ring; + + ret = intel_logical_ring_begin(req, 10); + if (ret) + return ret; + + /* + * i915_reg.h includes a warning to place a MI_NOOP + * before a MI_LOAD_REGISTER_IMM + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + /* Set counter period */ + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit_reg(ringbuf, RING_THRESH(ring->mmio_base)); + intel_logical_ring_emit(ringbuf, ring->watchdog_threshold); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + /* Start counter */ + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit_reg(ringbuf, RING_CNTR(ring->mmio_base)); + intel_logical_ring_emit(ringbuf, I915_WATCHDOG_ENABLE); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int +gen8_ring_stop_watchdog(struct drm_i915_gem_request *req) +{ + int ret; + struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_engine_cs *ring = ringbuf->ring; + + ret = intel_logical_ring_begin(req, 6); + if (ret) + return ret; + + /* + * i915_reg.h includes a warning to place a MI_NOOP + * before a MI_LOAD_REGISTER_IMM + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_NOOP); + + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit_reg(ringbuf, RING_CNTR(ring->mmio_base)); + + switch (ring->id) { + default: + WARN(1, "%s does not support watchdog timeout! " \ + "Defaulting to render engine.\n", ring->name); + case RCS: + intel_logical_ring_emit(ringbuf, GEN6_RCS_WATCHDOG_DISABLE); + break; + case VCS: + case VCS2: + intel_logical_ring_emit(ringbuf, GEN8_VCS_WATCHDOG_DISABLE); + break; + } + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + /** * execlists_submission() - submit a batchbuffer for execution, Execlists style * @dev: DRM device. @@ -1124,6 +1198,12 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, int instp_mode; u32 instp_mask; int ret; + bool watchdog_running = false; + /* + * NB: Place-holder until watchdog timeout is enabled through DRM + * execbuf interface + */ + bool enable_watchdog = false; instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; @@ -1160,6 +1240,18 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, if (ret) return ret; + /* Start watchdog timer */ + if (enable_watchdog) { + if (!intel_ring_supports_watchdog(ring)) + return -EINVAL; + + ret = gen8_ring_start_watchdog(params->request); + if (ret) + return ret; + + watchdog_running = true; + } + if (ring == &dev_priv->ring[RCS] && instp_mode != dev_priv->relative_constants_mode) { ret = intel_logical_ring_begin(params->request, 4); @@ -1184,6 +1276,13 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); + /* Cancel watchdog timer */ + if (watchdog_running) { + ret = gen8_ring_stop_watchdog(params->request); + if (ret) + return ret; + } + i915_gem_execbuffer_move_to_active(vmas, params->request); i915_gem_execbuffer_retire_commands(params); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index dbace39..1a78105 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -31,6 +31,8 @@ struct intel_hw_status_page { struct drm_i915_gem_object *obj; }; +#define I915_WATCHDOG_ENABLE 0 + #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) @@ -536,6 +538,26 @@ int intel_ring_save(struct intel_engine_cs *ring, int intel_ring_restore(struct intel_engine_cs *ring, struct drm_i915_gem_request *req); +static inline bool intel_ring_supports_watchdog(struct intel_engine_cs *ring) +{ + bool ret = false; + + if (WARN_ON(!ring)) + goto exit; + + ret = ( ring->id == RCS || + ring->id == VCS || + ring->id == VCS2); + + if (!ret) + DRM_ERROR("%s does not support watchdog timeout!\n", ring->name); + +exit: + return ret; +} +int intel_ring_start_watchdog(struct intel_engine_cs *ring); +int intel_ring_stop_watchdog(struct intel_engine_cs *ring); + int __must_check intel_ring_idle(struct intel_engine_cs *ring); void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx