As now the invalidate and flush bitfields are only used as booleans, and we may want to extend the range of actions in future, consolidate those parameters into a new bitmask. Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 11 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 284 +++++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +- 4 files changed, 162 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ff47145..540a9c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -360,7 +360,7 @@ mi_set_context(struct intel_ring_buffer *ring, * itlb_before_ctx_switch. */ if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) { - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); + ret = intel_ring_invalidate_all_caches(ring); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 3db4a68..ce392eb 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -252,26 +252,23 @@ TRACE_EVENT(i915_gem_ring_dispatch, ); TRACE_EVENT(i915_gem_ring_flush, - TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush), - TP_ARGS(ring, invalidate, flush), + TP_PROTO(struct intel_ring_buffer *ring, u32 flush), + TP_ARGS(ring, flush), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) - __field(u32, invalidate) __field(u32, flush) ), TP_fast_assign( __entry->dev = ring->dev->primary->index; __entry->ring = ring->id; - __entry->invalidate = invalidate; __entry->flush = flush; ), - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", - __entry->dev, __entry->ring, - __entry->invalidate, __entry->flush) + TP_printk("dev=%u, ring=%x, flush=%04x", + __entry->dev, __entry->ring, __entry->flush) ); DECLARE_EVENT_CLASS(i915_gem_request, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e51ab55..601e1eb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -52,38 +52,32 @@ static inline int ring_space(struct intel_ring_buffer *ring) } static int -gen2_render_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains) +gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 action) { - u32 cmd; int ret; - cmd = MI_FLUSH; - if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) - cmd |= MI_NO_WRITE_FLUSH; + if (action & (RING_INVALIDATE | RING_FLUSH)) { + u32 cmd = MI_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) - cmd |= MI_READ_FLUSH; + if (action & RING_INVALIDATE) + cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } return 0; } static int -gen4_render_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains) +gen4_render_ring_flush(struct intel_ring_buffer *ring, u32 action) { struct drm_device *dev = ring->dev; - u32 cmd; int ret; /* @@ -114,23 +108,23 @@ gen4_render_ring_flush(struct intel_ring_buffer *ring, * are flushed at any MI_FLUSH. */ - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) - cmd |= MI_EXE_FLUSH; + if (action & (RING_FLUSH | RING_INVALIDATE)) { + u32 cmd = MI_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(dev) || IS_GEN5(dev))) - cmd |= MI_INVALIDATE_ISP; + if (action & RING_INVALIDATE) { + cmd |= MI_EXE_FLUSH; + if (IS_G4X(dev) || IS_GEN5(dev)) + cmd |= MI_INVALIDATE_ISP; + } - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } return 0; } @@ -179,7 +173,6 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) u32 scratch_addr = pc->gtt_offset + 128; int ret; - ret = intel_ring_begin(ring, 6); if (ret) return ret; @@ -209,24 +202,18 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) } static int -gen6_render_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct intel_ring_buffer *ring, u32 action) { u32 flags = 0; struct pipe_control *pc = ring->private; u32 scratch_addr = pc->gtt_offset + 128; int ret; - /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(ring); - if (ret) - return ret; - /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (action & RING_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* @@ -235,7 +222,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, */ flags |= PIPE_CONTROL_CS_STALL; } - if (invalidate_domains) { + if (action & RING_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -248,15 +235,22 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (flags) { + /* Force SNB workarounds for PIPE_CONTROL flushes */ + ret = intel_emit_post_sync_nonzero_flush(ring); + if (ret) + return ret; - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + } return 0; } @@ -302,33 +296,22 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value) } static int -gen7_render_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, u32 flush_domains) +gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 action) { u32 flags = 0; struct pipe_control *pc = ring->private; u32 scratch_addr = pc->gtt_offset + 128; int ret; - /* - * Ensure that any following seqno writes only happen when the render - * cache is indeed flushed. - * - * Workaround: 4th PIPE_CONTROL command (except the ones with only - * read-cache invalidate bits set) must have the CS_STALL bit set. We - * don't try to be clever and just set it unconditionally. - */ - flags |= PIPE_CONTROL_CS_STALL; - /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (action & RING_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; } - if (invalidate_domains) { + if (action & RING_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -347,17 +330,30 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, gen7_render_ring_cs_stall_wa(ring); } - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (flags) { + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + if ((flags & RING_INVALIDATE) == 0) + flags |= PIPE_CONTROL_CS_STALL; - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; - if (flush_domains) + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + } + + if (action & RING_FLUSH) return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); return 0; @@ -956,19 +952,19 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) } static int -bsd_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains) +bsd_ring_flush(struct intel_ring_buffer *ring, u32 action) { int ret; - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; + if (action & (RING_FLUSH | RING_INVALIDATE)) { + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } return 0; } @@ -1636,31 +1632,34 @@ static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); } -static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate, u32 flush) +static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, u32 action) { - uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (action & (RING_FLUSH | RING_INVALIDATE)) { + u32 cmd = MI_FLUSH_DW; + + /* + * Bspec vol 1c.5 - video engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + if (action & RING_INVALIDATE) + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | + MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } - cmd = MI_FLUSH_DW; - /* - * Bspec vol 1c.5 - video engine command streamer: - * "If ENABLED, all TLBs will be invalidated once the flush - * operation is complete. This bit is only valid when the - * Post-Sync Operation field is a value of 1h or 3h." - */ - if (invalidate & I915_GEM_GPU_DOMAINS) - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | - MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); return 0; } @@ -1708,34 +1707,37 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct intel_ring_buffer *ring, - u32 invalidate, u32 flush) +static int gen6_ring_flush(struct intel_ring_buffer *ring, u32 action) { struct drm_device *dev = ring->dev; uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (action & (RING_FLUSH | RING_INVALIDATE)) { + cmd = MI_FLUSH_DW; - cmd = MI_FLUSH_DW; - /* - * Bspec vol 1c.3 - blitter engine command streamer: - * "If ENABLED, all TLBs will be invalidated once the flush - * operation is complete. This bit is only valid when the - * Post-Sync Operation field is a value of 1h or 3h." - */ - if (invalidate & I915_GEM_DOMAIN_RENDER) - cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | - MI_FLUSH_DW_OP_STOREDW; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + /* + * Bspec vol 1c.3 - blitter engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + if (action & RING_INVALIDATE) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; - if (IS_GEN7(dev) && flush) + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } + + if (IS_GEN7(dev) && action & RING_FLUSH) return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); return 0; @@ -2027,11 +2029,11 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring) if (!ring->gpu_caches_dirty) return 0; - ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + ret = ring->flush(ring, RING_FLUSH); if (ret) return ret; - trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); + trace_i915_gem_ring_flush(ring, RING_FLUSH); ring->gpu_caches_dirty = false; return 0; @@ -2040,18 +2042,36 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring) int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) { - uint32_t flush_domains; + u32 action; + int ret; + + action = RING_INVALIDATE; + if (ring->gpu_caches_dirty) + action |= RING_FLUSH; + + ret = ring->flush(ring, action); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, action); + + ring->gpu_caches_dirty = false; + return 0; +} + +int +intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action) +{ int ret; - flush_domains = 0; if (ring->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; + action |= RING_FLUSH; - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->flush(ring, action); if (ret) return ret; - trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + trace_i915_gem_ring_flush(ring, action); ring->gpu_caches_dirty = false; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 799f04c..5066b3b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -93,8 +93,10 @@ struct intel_ring_buffer { void (*write_tail)(struct intel_ring_buffer *ring, u32 value); int __must_check (*flush)(struct intel_ring_buffer *ring, - u32 invalidate_domains, - u32 flush_domains); + u32 action); +#define RING_FLUSH 0x1 +#define RING_INVALIDATE 0x2 + int (*add_request)(struct intel_ring_buffer *ring); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. @@ -240,6 +242,7 @@ int __must_check intel_ring_idle(struct intel_ring_buffer *ring); void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); +int intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); -- 1.8.3.1