In preparation for removing the manual EMIT_FLUSH prior to emitting the breadcrumb implement the flush inline with writing the breadcrumb for ringbuffer emission. With a combined flush+breadcrumb, we can use a single operation to both flush and after the flush is complete (post-sync) write the breadcrumb. This gives us a strongly ordered operation that should be sufficient to serialise the write before we emit the interrupt; and therefore we may take the opportunity to remove the irq_seqno_barrier w/a for gen6+. Although using the PIPECONTROL to write the breadcrumb is slower than MI_STORE_DWORD_IMM, by combining the operations into one and removing the extra flush (next patch) it is faster For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission, though maybe we could find a solution here to the seqno-vs-interrupt issue on Ironlake by mixing up the flush? The answer is no, adding an MI_FLUSH before the interrupt is insufficient. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 97 +++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d5a9edbcf1be..eb3701fdeab6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct i915_request *rq) +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) { u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs; @@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(rq); + ret = gen6_emit_post_sync_nonzero_flush(rq); if (ret) return ret; @@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } +static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + + /* Finally we can flush and with it emit the breadcrumb */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->global_seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); +} +static const int gen6_rcs_emit_breadcrumb_sz = 14; + static int gen7_render_ring_cs_stall_wa(struct i915_request *rq) { @@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } +static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); + *cs++ = intel_hws_seqno_address(rq->engine); + *cs++ = rq->global_seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); +} +static const int gen7_rcs_emit_breadcrumb_sz = 6; + +static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->global_seqno; + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); +} +static const int gen6_xcs_emit_breadcrumb_sz = 4; + static void set_hwstam(struct intel_engine_cs *engine, u32 mask) { /* @@ -777,6 +841,9 @@ static void i9xx_submit_request(struct i915_request *request) static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; *cs++ = rq->global_seqno; @@ -786,7 +853,7 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) assert_ring_tail_valid(rq->ring, rq->tail); } -static const int i9xx_emit_breadcrumb_sz = 4; +static const int i9xx_emit_breadcrumb_sz = 6; static void gen5_seqno_barrier(struct intel_engine_cs *engine) @@ -2122,11 +2189,16 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; - if (IS_GEN(dev_priv, 6)) - engine->emit_flush = gen6_render_ring_flush; + engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; + } else if (IS_GEN(dev_priv, 6)) { + engine->init_context = intel_rcs_ctx_init; + engine->emit_flush = gen6_render_ring_flush; + engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2161,6 +2233,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; + + engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2176,11 +2251,16 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); + intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; + engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; + return intel_init_ring_buffer(engine); } @@ -2188,6 +2268,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(INTEL_GEN(dev_priv) < 7); + intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; @@ -2195,5 +2277,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; + engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; + return intel_init_ring_buffer(engine); } -- 2.20.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx