Re: drm/i915: Watchdog timeout: Ringbuffer command emission for gen8+

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 05/01/2019 02:39, Carlos Santa wrote:
From: Michel Thierry <michel.thierry@xxxxxxxxx>

Emit the required commands into the ring buffer for starting and
stopping the watchdog timer before/after batch buffer start during
batch buffer submission.

v2: Support watchdog threshold per context engine, merge lri commands,
and move watchdog commands emission to emit_bb_start. Request space of
combined start_watchdog, bb_start and stop_watchdog to avoid any error
after emitting bb_start.

v3: There were too many req->engine in emit_bb_start.
Use GEM_BUG_ON instead of returning a very late EINVAL in the remote
case of watchdog misprogramming; set correct LRI cmd size in
emit_stop_watchdog. (Chris)

v4: Rebase.
v5: use to_intel_context instead of ctx->engine.
v6: Rebase.

Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Antonio Argenziano <antonio.argenziano@xxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx>
Signed-off-by: Carlos Santa <carlos.santa@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_gem_context.h |  4 ++
  drivers/gpu/drm/i915/intel_lrc.c        | 80 ++++++++++++++++++++++++-
  drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++
  3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index f6d870b1f73e..62f4eb04985b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -169,6 +169,10 @@ struct i915_gem_context {
  		u32 *lrc_reg_state;
  		u64 lrc_desc;
  		int pin_count;
+		/** watchdog_threshold: hw watchdog threshold value,
+		 * in clock counts
+		 */
+		u32 watchdog_threshold;
const struct intel_context_ops *ops;
  	} __engine[I915_NUM_ENGINES];
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e1dcdf545bee..0ea5a37c3357 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1872,12 +1872,33 @@ static int gen8_emit_bb_start(struct i915_request *rq,
  			      u64 offset, u32 len,
  			      const unsigned int flags)
  {
+	struct intel_engine_cs *engine = rq->engine;
  	u32 *cs;
+	u32 num_dwords;
+	bool watchdog_running = false;

Hm it's not really running but should be running, but ok. I'd just find something lie 'run_watchdog' or 'enable_watchdog' clearer.

- cs = intel_ring_begin(rq, 6);
+	/* bb_start only */
+	num_dwords = 6;
+
+	/* check if watchdog will be required */
+	if (to_intel_context(rq->gem_context, engine)->watchdog_threshold != 0) {
+		GEM_BUG_ON(!engine->emit_start_watchdog ||
+			   !engine->emit_stop_watchdog);
+
+		/* + start_watchdog (6) + stop_watchdog (4) */
+		num_dwords += 10;
+		watchdog_running = true;
+        }
+
+	cs = intel_ring_begin(rq, num_dwords);
  	if (IS_ERR(cs))
  		return PTR_ERR(cs);
+ if (watchdog_running) {
+		/* Start watchdog timer */
+		cs = engine->emit_start_watchdog(rq, cs);
+	}
+
  	/*
  	 * WaDisableCtxRestoreArbitration:bdw,chv
  	 *
@@ -1906,8 +1927,12 @@ static int gen8_emit_bb_start(struct i915_request *rq,
  	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
  	*cs++ = MI_NOOP;

It could be neater to avoid to many MI_NOOPs. I think if you'd make emit start/stop not emit it, and moved this line above to be last, a single MI_MOOP could be emitted conditionally if the total number of emitted (based on cs pointer) is odd.

- intel_ring_advance(rq, cs);
+	if (watchdog_running) {
+		/* Cancel watchdog timer */
+		cs = engine->emit_stop_watchdog(rq, cs);
+	}
+ intel_ring_advance(rq, cs);
  	return 0;
  }
@@ -2091,6 +2116,49 @@ static void gen8_watchdog_irq_handler(unsigned long data)
  	intel_uncore_forcewake_put(dev_priv, fw_domains);
  }
+static u32 *gen8_emit_start_watchdog(struct i915_request *rq, u32 *cs)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_gem_context *ctx = rq->gem_context;
+	struct intel_context *ce = to_intel_context(ctx, engine);
+
+	/* XXX: no watchdog support in BCS engine */
+	GEM_BUG_ON(engine->id == BCS);
+
+	/*
+	 * watchdog register must never be programmed to zero. This would
+	 * cause the watchdog counter to exceed and not allow the engine to

Waht does exceed mean here?

+	 * go into IDLE state
+	 */
+	GEM_BUG_ON(ce->watchdog_threshold == 0);
+
+	/* Set counter period */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(RING_THRESH(engine->mmio_base));
+	*cs++ = ce->watchdog_threshold;
+	/* Start counter */
+	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
+	*cs++ = GEN8_WATCHDOG_ENABLE;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+static u32 *gen8_emit_stop_watchdog(struct i915_request *rq, u32 *cs)
+{
+	struct intel_engine_cs *engine = rq->engine;
+
+	/* XXX: no watchdog support in BCS engine */
+	GEM_BUG_ON(engine->id == BCS);

I think there will be enough of these checks (including the previous patch) to warrant engine->flags |= I915_ENGINE_SUPPORTS_WATCHDOG.

+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
+	*cs++ = get_watchdog_disable(engine);

Since this helper now potentially runs on every bb end perhaps it is worth storing the magic value in the engine instead of running the conditionals every time.

+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
  /*
   * Reserve space for 2 NOOPs at the end of each request to be
   * used as a workaround for not being allowed to do lite
@@ -2366,6 +2434,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
  	engine->emit_flush = gen8_emit_flush_render;
  	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
  	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
+	engine->emit_start_watchdog = gen8_emit_start_watchdog;
+	engine->emit_stop_watchdog = gen8_emit_stop_watchdog;

I don't see that these two need to be vfuncs - am I missing something?

ret = logical_ring_init(engine);
  	if (ret)
@@ -2392,6 +2462,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
  {
  	logical_ring_setup(engine);
+ /* BCS engine does not have a watchdog-expired irq */
+	if (engine->id != BCS) {
+		engine->emit_start_watchdog = gen8_emit_start_watchdog;
+		engine->emit_stop_watchdog = gen8_emit_stop_watchdog;
+	}
+
  	return logical_ring_init(engine);
  }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6cb8b4280035..c7aa842a2db1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -469,6 +469,10 @@ struct intel_engine_cs {
  	int		(*init_context)(struct i915_request *rq);
int (*emit_flush)(struct i915_request *request, u32 mode);
+	u32 *		(*emit_start_watchdog)(struct i915_request *rq,
+					       u32 *cs);
+	u32 *		(*emit_stop_watchdog)(struct i915_request *rq,
+					      u32 *cs);
  #define EMIT_INVALIDATE	BIT(0)
  #define EMIT_FLUSH	BIT(1)
  #define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux