On Wed, Feb 15, 2017 at 02:05:52PM +0000, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > This leaves the ringbuff submission code in intel_ringbuffer.c > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_engine_cs.c | 834 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 834 -------------------------------- > 2 files changed, 834 insertions(+), 834 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 538d845d7251..afaedc3adc2e 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -525,6 +525,840 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, > } > } > > +static int wa_add(struct drm_i915_private *dev_priv, > + i915_reg_t addr, > + const u32 mask, const u32 val) > +{ > + const u32 idx = dev_priv->workarounds.count; > + > + if (WARN_ON(idx >= I915_MAX_WA_REGS)) > + return -ENOSPC; > + > + dev_priv->workarounds.reg[idx].addr = addr; > + dev_priv->workarounds.reg[idx].value = val; > + dev_priv->workarounds.reg[idx].mask = mask; > + > + dev_priv->workarounds.count++; > + > + return 0; > +} > + > +#define WA_REG(addr, mask, val) do { \ > + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ > + if (r) \ > + return r; \ > + } while (0) > + > +#define WA_SET_BIT_MASKED(addr, mask) \ > + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) > + > +#define WA_CLR_BIT_MASKED(addr, mask) \ > + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) > + > +#define WA_SET_FIELD_MASKED(addr, mask, value) \ > + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) > + > +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) > +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) > + > +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) > + > +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, > + i915_reg_t reg) > +{ > + struct drm_i915_private *dev_priv = engine->i915; > + struct i915_workarounds *wa = &dev_priv->workarounds; > + const uint32_t index = wa->hw_whitelist_count[engine->id]; > + > + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) > + return -EINVAL; > + > + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), > + i915_mmio_reg_offset(reg)); > + wa->hw_whitelist_count[engine->id]++; > + > + return 0; > +} > + > +static int gen8_init_workarounds(struct intel_engine_cs *engine) > +{ > + struct drm_i915_private *dev_priv = engine->i915; > + > + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); > + > + /* WaDisableAsyncFlipPerfMode:bdw,chv */ > + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); > + > + /* WaDisablePartialInstShootdown:bdw,chv */ > + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, > + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > + > + /* Use Force Non-Coherent whenever executing a 3D context. This is a > + * workaround for for a possible hang in the unlikely event a TLB > + * invalidation occurs during a PSD flush. > + */ > + /* WaForceEnableNonCoherent:bdw,chv */ > + /* WaHdcDisableFetchWhenMasked:bdw,chv */ > + WA_SET_BIT_MASKED(HDC_CHICKEN0, > + HDC_DONOT_FETCH_MEM_WHEN_MASKED | > + HDC_FORCE_NON_COHERENT); > + > + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: > + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping > + * polygons in the same 8x4 pixel/sample area to be processed without > + * stalling waiting for the earlier ones to write to Hierarchical Z > + * buffer." > + * > + * This optimization is off by default for BDW and CHV; turn it on. > + */ > + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); > + > + /* Wa4x4STCOptimizationDisable:bdw,chv */ > + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); > + > + /* > + * BSpec recommends 8x4 when MSAA is used, > + * however in practice 16x4 seems fastest. > + * > + * Note that PS/WM thread counts depend on the WIZ hashing > + * disable bit, which we don't touch here, but it's good > + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). > + */ > + WA_SET_FIELD_MASKED(GEN7_GT_MODE, > + GEN6_WIZ_HASHING_MASK, > + GEN6_WIZ_HASHING_16x4); > + > + return 0; > +} > + > +static int bdw_init_workarounds(struct intel_engine_cs *engine) > +{ > + struct drm_i915_private *dev_priv = engine->i915; > + int ret; > + > + ret = gen8_init_workarounds(engine); > + if (ret) > + return ret; > + > + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ Hmm. We still have pre-prod workarounds for BDW? Any volunteers to go through it all and clean things up? Or maybe someone already did and the patches are in some kind of a limbo? -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx