/*
* Record the position of the start of the request so that
@@ -908,7 +909,7 @@ void i915_request_add(struct i915_request *request)
* GPU processing the request, we never over-estimate the
* position of the ring's HEAD.
*/
- cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+ cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_sz);
GEM_BUG_ON(IS_ERR(cs));
request->postfix = intel_ring_offset(request, cs);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 96c586d6ff4d..340d6216791c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq)
if (i915_request_signaled(rq))
return true;
+ /* Remember: started but may have since been preempted! */
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
}
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 007348b1b469..7bc9164733bc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -132,6 +132,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
timeline->i915 = i915;
timeline->name = name;
timeline->pin_count = 0;
+ timeline->has_initial_breadcrumb = !hwsp;
timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
if (!hwsp) {
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index ab736e2e5707..8caeb66d1cd5 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -48,6 +48,8 @@ struct i915_timeline {
struct i915_vma *hwsp_ggtt;
u32 hwsp_offset;
+ bool has_initial_breadcrumb;
+
/**
* List of breadcrumbs associated with GPU requests currently
* outstanding.
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e532b4b27239..2a4c547240a1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1239,7 +1239,9 @@ static void print_request(struct drm_printer *m,
drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
prefix,
rq->global_seqno,
- i915_request_completed(rq) ? "!" : "",
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
rq->fence.context, rq->fence.seqno,
buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1bf178ca3e00..0a2d53f19625 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -649,7 +649,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent
* ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_breadcrumb() for
+ * request. See gen8_emit_fini_breadcrumb() for
* where we prepare the padding after the
* end of the request.
*/
@@ -1294,6 +1294,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
return __execlists_context_pin(engine, ctx, ce);
}
+static int gen8_emit_init_breadcrumb(struct i915_request *rq)
+{
+ u32 *cs;
+
+ GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Check if we have been preempted before we even get started.
+ *
+ * After this point i915_request_started() reports true, even if
+ * we get preempted and so are no longer running.
+ */
+ *cs++ = MI_ARB_CHECK;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_timeline_seqno_address(rq->timeline);
+ *cs++ = 0;
+ *cs++ = rq->fence.seqno - 1;
+
+ intel_ring_advance(rq, cs);
+ return 0;
+}
+
static int emit_pdps(struct i915_request *rq)
{
const struct intel_engine_cs * const engine = rq->engine;
@@ -2049,7 +2077,7 @@ static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
request->wa_tail = intel_ring_offset(request, cs);
}
-static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
+static void gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -2070,9 +2098,9 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
+static const int gen8_emit_fini_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
-static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+static void gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
@@ -2096,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
+static const int gen8_emit_fini_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct i915_request *rq)
{
@@ -2188,8 +2216,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush;
- engine->emit_breadcrumb = gen8_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
+ engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen8_emit_fini_breadcrumb_sz;
engine->set_default_submission = intel_execlists_set_default_submission;
@@ -2302,8 +2331,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
/* Override some for render ring. */
engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render;
- engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
- engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ engine->emit_fini_breadcrumb_sz = gen8_emit_fini_breadcrumb_rcs_sz;
ret = logical_ring_init(engine);
if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 751bd4e7da42..f6b30eb46263 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
err = PTR_ERR(timeline);
goto err;
}
+ GEM_BUG_ON(timeline->has_initial_breadcrumb);
ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
i915_timeline_put(timeline);
@@ -1947,6 +1948,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret;
GEM_BUG_ON(!request->hw_context->pin_count);
+ GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -2283,11 +2285,16 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->context_pin = intel_ring_context_pin;
engine->request_alloc = ring_request_alloc;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
- engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
+ /*
+ * Using a global execution timeline; the previous final breadcrumb is
+ * equivalent to our next initial bread so we can elide
+ * engine->emit_init_breadcrumb().
+ */
+ engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
if (IS_GEN(dev_priv, 5)) {
- engine->emit_breadcrumb = gen5_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen5_emit_breadcrumb_sz;
}
engine->set_default_submission = i9xx_set_default_submission;
@@ -2317,13 +2324,13 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) >= 7) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush;
- engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
} else if (IS_GEN(dev_priv, 6)) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen6_render_ring_flush;
- engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
} else if (IS_GEN(dev_priv, 5)) {
engine->emit_flush = gen4_render_ring_flush;
} else {
@@ -2360,11 +2367,11 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) {
- engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
} else {
- engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
}
} else {
engine->emit_flush = bsd_ring_flush;
@@ -2389,11 +2396,11 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) {
- engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
} else {
- engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
}
return intel_init_ring_buffer(engine);
@@ -2412,8 +2419,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable = hsw_vebox_irq_enable;
engine->irq_disable = hsw_vebox_irq_disable;
- engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
- engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
return intel_init_ring_buffer(engine);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a792bacf2930..d3d4f3667afb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -463,8 +463,10 @@ struct intel_engine_cs {
unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
- void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
- int emit_breadcrumb_sz;
+ int (*emit_init_breadcrumb)(struct i915_request *rq);
+ void (*emit_fini_breadcrumb)(struct i915_request *rq,
+ u32 *cs);
+ unsigned int emit_fini_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist).
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index c0a408828415..2515cffb4490 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.context_pin = mock_context_pin;
engine->base.request_alloc = mock_request_alloc;
engine->base.emit_flush = mock_emit_flush;
- engine->base.emit_breadcrumb = mock_emit_breadcrumb;
+ engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request;
if (i915_timeline_init(i915,