On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery@xxxxxxxxxxxxxxx wrote: > From: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > > Workarounds for bdw are currently applied in init_clock_gating() but they > are lost following a gpu reset. Some of the registers are part of register > state context and they are restored with every context switch so initializing > WAs in golden render state ensures that they are applied even when we start > with an uninitialized context or during hw initialization followed by a reset. This approach might require separate null states for BDW vs. CHV and IVB vs. HSW vs. VLV, which seems a bit unfortunate. Might be better to just issue the w/a register writes via LRIs from the code as part of the null state load. Although I don't actually undertand how this improves things as opposed to just appllying the w/as via mmio writes. Does it? > > Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_pm.c | 50 --------------------- > drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++---------- > 2 files changed, 39 insertions(+), 73 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 1ddd4df..ab64b64 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev) > /* FIXME(BDW): Check all the w/a, some might only apply to > * pre-production hw. */ > > - /* WaDisablePartialInstShootdown:bdw */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); > - > - /* WaDisableThreadStallDopClockGating:bdw */ > - /* FIXME: Unclear whether we really need this on production bdw. */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); > - > - /* > - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for > - * pre-production hardware > - */ > - I915_WRITE(HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); > - I915_WRITE(HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); > I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); > > I915_WRITE(_3D_CHICKEN3, > _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); > > - I915_WRITE(COMMON_SLICE_CHICKEN2, > - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); > - > - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, > - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); > - > - /* WaDisableDopClockGating:bdw May not be needed for production */ > - I915_WRITE(GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > - > /* WaSwitchSolVfFArbitrationPriority:bdw */ > I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); > > @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev) > BDW_DPRS_MASK_VBLANK_SRD); > } > > - /* Use Force Non-Coherent whenever executing a 3D context. This is a > - * workaround for for a possible hang in the unlikely event a TLB > - * invalidation occurs during a PSD flush. > - */ > - I915_WRITE(HDC_CHICKEN0, > - I915_READ(HDC_CHICKEN0) | > - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); > - > /* WaVSRefCountFullforceMissDisable:bdw */ > /* WaDSRefCountFullforceMissDisable:bdw */ > I915_WRITE(GEN7_FF_THREAD_MODE, > I915_READ(GEN7_FF_THREAD_MODE) & > ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); > > - /* > - * BSpec recommends 8x4 when MSAA is used, > - * however in practice 16x4 seems fastest. > - * > - * Note that PS/WM thread counts depend on the WIZ hashing > - * disable bit, which we don't touch here, but it's good > - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). > - */ > - I915_WRITE(GEN7_GT_MODE, > - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); > - > I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, > _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); > > /* WaDisableSDEUnitClockGating:bdw */ > I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | > GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > - > - /* Wa4x4STCOptimizationDisable:bdw */ > - I915_WRITE(CACHE_MODE_1, > - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); > } > > static void haswell_init_clock_gating(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c > index 75ef1b5..0b26783 100644 > --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c > +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c > @@ -1,14 +1,38 @@ > #include "intel_renderstate.h" > > static const u32 gen8_null_state_relocs[] = { > - 0x00000048, > - 0x00000050, > - 0x00000060, > - 0x000003ec, > + 0x000000a8, > + 0x000000b0, > + 0x000000c0, > + 0x0000044c, > -1, > }; > > static const u32 gen8_null_state_batch[] = { > + 0x11000001, > + 0x0000e4f0, > + 0x83208320, > + 0x11000001, > + 0x0000e4f4, > + 0x00010001, > + 0x11000001, > + 0x0000e184, > + 0x01020102, > + 0x11000001, > + 0x0000e100, > + 0x04000400, > + 0x11000001, > + 0x00007014, > + 0x00010001, > + 0x11000001, > + 0x00007300, > + 0x00100010, > + 0x11000001, > + 0x00007004, > + 0x00400040, > + 0x11000001, > + 0x00007008, > + 0x02800200, > 0x69040000, > 0x61020001, > 0x00000000, > @@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = { > 0xfffff001, > 0x00001001, > 0x78230000, > - 0x000006e0, > + 0x00000720, > 0x78210000, > - 0x00000700, > + 0x00000740, > 0x78300000, > 0x08010040, > 0x78330000, > @@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = { > 0x78320000, > 0x08000000, > 0x78240000, > - 0x00000641, > + 0x00000681, > 0x780e0000, > - 0x00000601, > + 0x00000641, > 0x780d0000, > 0x00000000, > 0x78180000, > @@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = { > 0x00000000, > 0x00000000, > 0x782a0000, > - 0x00000480, > + 0x000004c0, > 0x782f0000, > - 0x00000540, > + 0x00000580, > 0x78140000, > 0x00000800, > 0x78170009, > @@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = { > 0x00000000, > 0x00000000, > 0x7820000a, > - 0x00000580, > + 0x000005c0, > 0x00000000, > 0x08080000, > 0x00000000, > @@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = { > 0x784f0000, > 0x80000100, > 0x780f0000, > - 0x00000740, > + 0x00000780, > 0x78050006, > 0x00000000, > 0x00000000, > @@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = { > 0x00000000, > 0x78080003, > 0x00006000, > - 0x000005e0, /* reloc */ > + 0x00000620, /* reloc */ > 0x00000000, > 0x00000000, > 0x78090005, > @@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = { > 0x00000000, > 0x00000000, > 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x00000000, > - 0x000004c0, /* state start */ > - 0x00000500, > + 0x00000500, /* state start */ > + 0x00000540, > 0x00000000, > 0x00000000, > 0x00000000, > -- > 2.0.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx