On Thu, 22 Jan 2015, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > This looked like an odd regression from > > commit ec5cc0f9b019af95e4571a9fa162d94294c8d90b > Author: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Date: Thu Jun 12 10:28:55 2014 +0100 > > drm/i915: Restrict GPU boost to the RCS engine > > but in reality it undercovered a much older coherency bug. The issue that > boosting the GPU frequency on the BCS ring was masking was that we could > wake the CPU up after completion of a BCS batch and inspect memory prior > to the write cache being fully evicted. In order to serialise the > breadcrumb interrupt (and so ensure that the CPU's view of memory is > coherent) we need to perform a post-sync operation in the MI_FLUSH_DW. > > v2: Fix all the MI_FLUSH_DW (bsd plus the duplication in execlists). > > Testcase: gpuX-rcs-gpu-read-after-write > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx > Acked-by: Daniel Vetter <daniel@xxxxxxxx> > --- > drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++--------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++---- > 2 files changed, 30 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index e405b61cdac5..8e71d8851c9a 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1237,15 +1237,17 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, > > cmd = MI_FLUSH_DW + 1; > > - if (ring == &dev_priv->ring[VCS]) { > - if (invalidate_domains & I915_GEM_GPU_DOMAINS) > - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | > - MI_FLUSH_DW_STORE_INDEX | > - MI_FLUSH_DW_OP_STOREDW; > - } else { > - if (invalidate_domains & I915_GEM_DOMAIN_RENDER) > - cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | > - MI_FLUSH_DW_OP_STOREDW; > + /* We always require a command barrier so that subsequent > + * commands, such as breadcrumb interrupts, are strictly ordered > + * wrt the contents of the write cache being flushed to memory > + * (and thus being coherent from the CPU). > + */ > + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; > + > + if (invalidate_domains & I915_GEM_GPU_DOMAINS) { Why do you change the mask from I915_GEM_DOMAIN_RENDER to I915_GEM_GPU_DOMAINS for ring != VCS? BR, Jani. > + cmd |= MI_INVALIDATE_TLB; > + if (ring == &dev_priv->ring[VCS]) > + cmd |= MI_INVALIDATE_BSD; > } > > intel_logical_ring_emit(ringbuf, cmd); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 23020d67329b..718530fd6c6b 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -2224,6 +2224,14 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > cmd = MI_FLUSH_DW; > if (INTEL_INFO(ring->dev)->gen >= 8) > cmd += 1; > + > + /* We always require a command barrier so that subsequent > + * commands, such as breadcrumb interrupts, are strictly ordered > + * wrt the contents of the write cache being flushed to memory > + * (and thus being coherent from the CPU). > + */ > + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; > + > /* > * Bspec vol 1c.5 - video engine command streamer: > * "If ENABLED, all TLBs will be invalidated once the flush > @@ -2231,8 +2239,8 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > * Post-Sync Operation field is a value of 1h or 3h." > */ > if (invalidate & I915_GEM_GPU_DOMAINS) > - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | > - MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; > + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; > + > intel_ring_emit(ring, cmd); > intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); > if (INTEL_INFO(ring->dev)->gen >= 8) { > @@ -2328,6 +2336,14 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, > cmd = MI_FLUSH_DW; > if (INTEL_INFO(ring->dev)->gen >= 8) > cmd += 1; > + > + /* We always require a command barrier so that subsequent > + * commands, such as breadcrumb interrupts, are strictly ordered > + * wrt the contents of the write cache being flushed to memory > + * (and thus being coherent from the CPU). > + */ > + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; > + > /* > * Bspec vol 1c.3 - blitter engine command streamer: > * "If ENABLED, all TLBs will be invalidated once the flush > @@ -2335,8 +2351,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, > * Post-Sync Operation field is a value of 1h or 3h." > */ > if (invalidate & I915_GEM_DOMAIN_RENDER) > - cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | > - MI_FLUSH_DW_OP_STOREDW; > + cmd |= MI_INVALIDATE_TLB; > intel_ring_emit(ring, cmd); > intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); > if (INTEL_INFO(ring->dev)->gen >= 8) { > -- > 2.1.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Jani Nikula, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx