On Thu, 09 Jan 2014, akash.goel@xxxxxxxxx wrote: > From: akashgoe <akash.goel@xxxxxxxxx> > > The following changes leads to stable behavior, especially > when playing 3D Apps, benchmarks. > > Added 4 new rendering specific Workarounds > 1. WaTlbInvalidateStoreDataBefore :- > Before pipecontrol with TLB invalidate set, > need 2 store data commands > 2. WaReadAfterWriteHazard :- > Send 8 store dword commands after flush > for read after write hazard > (HSD Gen6 bug_de 3047871) > 3. WaVSThreadDispatchOverride > Performance optimization - Hw will > decide which half slice the thread > will dispatch, May not be really needed > for VLV, as its single slice > 4. WaDisable_RenderCache_OperationalFlush > Operational flush cannot be enabled on > BWG A0 [Errata BWT006] > > Removed 3 workarounds as not needed for VLV+(B0 onwards) > 1. WaDisableRHWOOptimizationForRenderHang > 2. WaDisableL3CacheAging > 3. WaDisableDopClockGating > > Modified the implementation of 1 workaround > 1. WaDisableL3Bank2xClockGate > Disabling L3 clock gating- MMIO 940c[25] = 1 > > Modified the programming of 2 registers in render ring init function > 1. GFX_MODE_GEN7 (Enabling TLB invalidate) > 2. MI_MODE (Enabling MI Flush) Frankly, I don't know much about these particular workarounds, but for bisecting any regressions it would probably be a good idea to split the patch per workaround touched. BR, Jani. > > Change-Id: Ib60f8dc7d2213552e498d588e1bba7eaa1a921ed > Signed-off-by: akashgoe <akash.goel@xxxxxxxxx> > Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_reg.h | 3 ++ > drivers/gpu/drm/i915/intel_pm.c | 32 +++++++++++------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 58 ++++++++++++++++++++++++++++++--- > 3 files changed, 77 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index a699efd..d829754 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -934,6 +934,9 @@ > #define ECO_GATING_CX_ONLY (1<<3) > #define ECO_FLIP_DONE (1<<0) > > +#define GEN7_CACHE_MODE_0 0x07000 /* IVB+ only */ > +#define GEN7_RC_OP_FLUSH_ENABLE (1<<0) > + > #define CACHE_MODE_1 0x7004 /* IVB+ */ > #define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 469170c..e4d220c 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -4947,22 +4947,18 @@ static void valleyview_init_clock_gating(struct drm_device *dev) > _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | > GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); > > - /* Apply the WaDisableRHWOOptimizationForRenderHang:vlv workaround. */ > - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, > - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); > - > - /* WaApplyL3ControlAndL3ChickenMode:vlv */ > - I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS); > I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); > > + /* WaDisable_RenderCache_OperationalFlush > + * Clear bit 0, so we do a AND with the mask > + * to keep other bits the same */ > + I915_WRITE(GEN7_CACHE_MODE_0, (I915_READ(GEN7_CACHE_MODE_0) | > + _MASKED_BIT_DISABLE(GEN7_RC_OP_FLUSH_ENABLE))); > + > /* WaForceL3Serialization:vlv */ > I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & > ~L3SQ_URB_READ_CAM_MATCH_DISABLE); > > - /* WaDisableDopClockGating:vlv */ > - I915_WRITE(GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > - > /* This is required by WaCatErrorRejectionIssue:vlv */ > I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, > I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | > @@ -4991,10 +4987,24 @@ static void valleyview_init_clock_gating(struct drm_device *dev) > GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | > GEN6_RCCUNIT_CLOCK_GATE_DISABLE); > > - I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE); > + /* WaDisableL3Bank2xClockGate > + * Disabling L3 clock gating- MMIO 940c[25] = 1 > + * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ > + I915_WRITE(GEN7_UCGCTL4, > + I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); > > I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); > > + /* WaVSThreadDispatchOverride > + * Hw will decide which half slice the thread will dispatch. > + * May not be needed for VLV, as its a single slice */ > + I915_WRITE(GEN7_CACHE_MODE_0, > + I915_READ(GEN7_FF_THREAD_MODE) & > + (~GEN7_FF_VS_SCHED_LOAD_BALANCE)); > + > + /* WaDisable4x2SubspanOptimization, > + * Disable combining of two 2x2 subspans into a 4x2 subspan > + * Set chicken bit to disable subspan optimization */ > I915_WRITE(CACHE_MODE_1, > _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 442c9a6..c9350a1 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -563,7 +563,9 @@ static int init_render_ring(struct intel_ring_buffer *ring) > int ret = init_ring_common(ring); > > if (INTEL_INFO(dev)->gen > 3) > - I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); > + if (!IS_VALLEYVIEW(dev)) > + I915_WRITE(MI_MODE, > + _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); > > /* We need to disable the AsyncFlip performance optimisations in order > * to use MI_WAIT_FOR_EVENT within the CS. It should already be > @@ -579,10 +581,17 @@ static int init_render_ring(struct intel_ring_buffer *ring) > I915_WRITE(GFX_MODE, > _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); > > - if (IS_GEN7(dev)) > - I915_WRITE(GFX_MODE_GEN7, > - _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | > - _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); > + if (IS_GEN7(dev)) { > + if (IS_VALLEYVIEW(dev)) { > + I915_WRITE(GFX_MODE_GEN7, > + _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); > + I915_WRITE(MI_MODE, I915_READ(MI_MODE) | > + _MASKED_BIT_ENABLE(MI_FLUSH_ENABLE)); > + } else > + I915_WRITE(GFX_MODE_GEN7, > + _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | > + _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); > + } > > if (INTEL_INFO(dev)->gen >= 5) { > ret = init_pipe_control(ring); > @@ -2157,6 +2166,7 @@ int > intel_ring_flush_all_caches(struct intel_ring_buffer *ring) > { > int ret; > + int i; > > if (!ring->gpu_caches_dirty) > return 0; > @@ -2165,6 +2175,26 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring) > if (ret) > return ret; > > + /* WaReadAfterWriteHazard*/ > + /* Send a number of Store Data commands here to finish > + flushing hardware pipeline.This is needed in the case > + where the next workload tries reading from the same > + surface that this batch writes to. Without these StoreDWs, > + not all of the data will actually be flushd to the surface > + by the time the next batch starts reading it, possibly > + causing a small amount of corruption.*/ > + ret = intel_ring_begin(ring, 4 * 12); > + if (ret) > + return ret; > + for (i = 0; i < 12; i++) { > + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << > + MI_STORE_DWORD_INDEX_SHIFT); > + intel_ring_emit(ring, 0); > + intel_ring_emit(ring, MI_NOOP); > + } > + intel_ring_advance(ring); > + > trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); > > ring->gpu_caches_dirty = false; > @@ -2176,6 +2206,24 @@ intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) > { > uint32_t flush_domains; > int ret; > + int i; > + > + /* WaTlbInvalidateStoreDataBefore*/ > + /* Before pipecontrol with TLB invalidate set, need 2 store > + data commands (such as MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) > + Without this, hardware cannot guarantee the command after the > + PIPE_CONTROL with TLB inv will not use the old TLB values.*/ > + ret = intel_ring_begin(ring, 4 * 2); > + if (ret) > + return ret; > + for (i = 0; i < 2; i++) { > + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << > + MI_STORE_DWORD_INDEX_SHIFT); > + intel_ring_emit(ring, 0); > + intel_ring_emit(ring, MI_NOOP); > + } > + intel_ring_advance(ring); > > flush_domains = 0; > if (ring->gpu_caches_dirty) > -- > 1.8.5.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Jani Nikula, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx