From: Oscar Mateo <oscar.mateo@xxxxxxxxx> Notice that the BSD invalidate bit is no longer present in GEN8, so we can consolidate the blt and bsd ring flushes into one. Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> --- drivers/gpu/drm/i915/intel_lrc.c | 80 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 --- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++ 3 files changed, 91 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3debe8b..3d7fcd6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -343,6 +343,81 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_flush(struct intel_engine_cs *ring, + struct intel_context *ctx, + u32 invalidate_domains, + u32 unused) +{ + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); + uint32_t cmd; + int ret; + + ret = intel_logical_ring_begin(ring, ctx, 4); + if (ret) + return ret; + + cmd = MI_FLUSH_DW + 1; + + /* + * Bspec vol 1c.3 - blitter engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ + intel_logical_ring_emit(ringbuf, 0); /* value */ + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen8_emit_flush_render(struct intel_engine_cs *ring, + struct intel_context *ctx, + u32 invalidate_domains, + u32 flush_domains) +{ + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); + u32 flags = 0; + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_logical_ring_begin(ring, ctx, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, flags); + intel_logical_ring_emit(ringbuf, scratch_addr); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); @@ -491,6 +566,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->submit_ctx = gen8_submit_ctx; ring->emit_request = gen8_emit_request_render; + ring->emit_flush = gen8_emit_flush_render; return logical_ring_init(dev, ring); } @@ -511,6 +587,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->submit_ctx = gen8_submit_ctx; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -531,6 +608,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->submit_ctx = gen8_submit_ctx; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -551,6 +629,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->submit_ctx = gen8_submit_ctx; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -571,6 +650,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->submit_ctx = gen8_submit_ctx; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 137ee9a..a128f6f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d8ded14..527db2a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,13 @@ #define I915_CMD_HASH_ORDER 9 +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" @@ -153,6 +160,10 @@ struct intel_engine_cs { struct intel_context *ctx, u32 value); int (*emit_request)(struct intel_engine_cs *ring, struct intel_context *ctx); + int __must_check (*emit_flush)(struct intel_engine_cs *ring, + struct intel_context *ctx, + u32 invalidate_domains, + u32 flush_domains); /** * List of objects currently involved in rendering from the -- 1.9.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx