On Fri, Jun 13, 2014 at 08:37:46AM -0700, oscar.mateo@xxxxxxxxx wrote: > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > Notice that the BSD invalidate bit is no longer present in GEN8, so Hmm. As far as I can tell, it is still present for VCS on gen8. As to whether we need to set it, I don't know. > we can consolidate the blt and bsd ring flushes into one. > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_lrc.c | 80 +++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 7 --- > drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++ > 3 files changed, 91 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 3debe8b..3d7fcd6 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -343,6 +343,81 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) > return ret; > } > > +static int gen8_emit_flush(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + u32 invalidate_domains, > + u32 unused) > +{ > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + uint32_t cmd; > + int ret; > + > + ret = intel_logical_ring_begin(ring, ctx, 4); > + if (ret) > + return ret; > + > + cmd = MI_FLUSH_DW + 1; > + > + /* > + * Bspec vol 1c.3 - blitter engine command streamer: > + * "If ENABLED, all TLBs will be invalidated once the flush > + * operation is complete. This bit is only valid when the > + * Post-Sync Operation field is a value of 1h or 3h." > + */ > + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) > + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | > + MI_FLUSH_DW_OP_STOREDW; > + intel_logical_ring_emit(ringbuf, cmd); > + intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); > + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ > + intel_logical_ring_emit(ringbuf, 0); /* value */ > + intel_logical_ring_advance(ringbuf); > + > + return 0; > +} > + > +static int gen8_emit_flush_render(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + u32 invalidate_domains, > + u32 flush_domains) > +{ > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + u32 flags = 0; > + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > + int ret; > + > + flags |= PIPE_CONTROL_CS_STALL; > + > + if (flush_domains) { > + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; > + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; > + } > + if (invalidate_domains) { > + flags |= PIPE_CONTROL_TLB_INVALIDATE; > + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; > + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; > + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; > + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; > + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; > + flags |= PIPE_CONTROL_QW_WRITE; > + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > + } > + > + ret = intel_logical_ring_begin(ring, ctx, 6); > + if (ret) > + return ret; > + > + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); > + intel_logical_ring_emit(ringbuf, flags); > + intel_logical_ring_emit(ringbuf, scratch_addr); > + intel_logical_ring_emit(ringbuf, 0); > + intel_logical_ring_emit(ringbuf, 0); > + intel_logical_ring_emit(ringbuf, 0); > + intel_logical_ring_advance(ringbuf); > + > + return 0; > +} > + > static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > { > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > @@ -491,6 +566,7 @@ static int logical_render_ring_init(struct drm_device *dev) > ring->set_seqno = gen8_set_seqno; > ring->submit_ctx = gen8_submit_ctx; > ring->emit_request = gen8_emit_request_render; > + ring->emit_flush = gen8_emit_flush_render; > > return logical_ring_init(dev, ring); > } > @@ -511,6 +587,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) > ring->set_seqno = gen8_set_seqno; > ring->submit_ctx = gen8_submit_ctx; > ring->emit_request = gen8_emit_request; > + ring->emit_flush = gen8_emit_flush; > > return logical_ring_init(dev, ring); > } > @@ -531,6 +608,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) > ring->set_seqno = gen8_set_seqno; > ring->submit_ctx = gen8_submit_ctx; > ring->emit_request = gen8_emit_request; > + ring->emit_flush = gen8_emit_flush; > > return logical_ring_init(dev, ring); > } > @@ -551,6 +629,7 @@ static int logical_blt_ring_init(struct drm_device *dev) > ring->set_seqno = gen8_set_seqno; > ring->submit_ctx = gen8_submit_ctx; > ring->emit_request = gen8_emit_request; > + ring->emit_flush = gen8_emit_flush; > > return logical_ring_init(dev, ring); > } > @@ -571,6 +650,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) > ring->set_seqno = gen8_set_seqno; > ring->submit_ctx = gen8_submit_ctx; > ring->emit_request = gen8_emit_request; > + ring->emit_flush = gen8_emit_flush; > > return logical_ring_init(dev, ring); > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 137ee9a..a128f6f 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -33,13 +33,6 @@ > #include "i915_trace.h" > #include "intel_drv.h" > > -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, > - * but keeps the logic simple. Indeed, the whole purpose of this macro is just > - * to give some inclination as to some of the magic values used in the various > - * workarounds! > - */ > -#define CACHELINE_BYTES 64 > - > bool > intel_ring_initialized(struct intel_engine_cs *ring) > { > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index d8ded14..527db2a 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -5,6 +5,13 @@ > > #define I915_CMD_HASH_ORDER 9 > > +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, > + * but keeps the logic simple. Indeed, the whole purpose of this macro is just > + * to give some inclination as to some of the magic values used in the various > + * workarounds! > + */ > +#define CACHELINE_BYTES 64 > + > /* > * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" > * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" > @@ -153,6 +160,10 @@ struct intel_engine_cs { > struct intel_context *ctx, u32 value); > int (*emit_request)(struct intel_engine_cs *ring, > struct intel_context *ctx); > + int __must_check (*emit_flush)(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + u32 invalidate_domains, > + u32 flush_domains); Any reason to make this one __must_check but not the others? Brad > > /** > * List of objects currently involved in rendering from the > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx