On Thu, Sep 18, 2014 at 05:58:33PM +0300, Mika Kuoppala wrote: > to disassociate workaround list init from the actual writing > of values. This is needed as not workarounds will be masked bit > enables and we want full control on when the read part of RMW > will happen. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> The piece imo still missing here is adding all the other (non-render context) wa registers to the wa list. It looks like wa_add should be able to cope, but I prefer we check that by e.g. reworking all gen7+ clock_gating wa to use this. This would also mean that we need to make these functions non-static. Might as well go nuts and extract most of the w/a functionality into a new intel_wa.c with a bit of DOC: overview sections and the important functions (intel_wa_add) having proper kerneldoc. Of course we can do this as a follow up once things settle a bit. Otherwise I think this is going in the right direction. -Daniel > --- > drivers/gpu/drm/i915/i915_debugfs.c | 18 +-- > drivers/gpu/drm/i915/i915_drv.h | 28 ++--- > drivers/gpu/drm/i915/intel_ringbuffer.c | 188 ++++++++++++++++++-------------- > 3 files changed, 129 insertions(+), 105 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 89b740b..c35c6ce 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2657,18 +2657,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused) > > gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); > > - seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs); > - for (i = 0; i < dev_priv->num_wa_regs; ++i) { > + seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count); > + for (i = 0; i < dev_priv->workarounds.count; ++i) { > u32 addr, mask; > > - addr = dev_priv->intel_wa_regs[i].addr; > - mask = dev_priv->intel_wa_regs[i].mask; > - dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask; > - if (dev_priv->intel_wa_regs[i].addr) > + addr = dev_priv->workarounds.reg[i].addr; > + mask = dev_priv->workarounds.reg[i].mask; > + dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask; > + if (dev_priv->workarounds.reg[i].addr) > seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", > - dev_priv->intel_wa_regs[i].addr, > - dev_priv->intel_wa_regs[i].value, > - dev_priv->intel_wa_regs[i].mask); > + dev_priv->workarounds.reg[i].addr, > + dev_priv->workarounds.reg[i].value, > + dev_priv->workarounds.reg[i].mask); > } > > gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 49b45ec..3087d5a 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1445,6 +1445,20 @@ struct i915_frontbuffer_tracking { > unsigned flip_bits; > }; > > +struct i915_wa_reg { > + u32 addr; > + u32 value; > + /* bitmask representing WA bits */ > + u32 mask; > +}; > + > +#define I915_MAX_WA_REGS 16 > + > +struct i915_workarounds { > + struct i915_wa_reg reg[I915_MAX_WA_REGS]; > + u32 count; > +}; > + > struct drm_i915_private { > struct drm_device *dev; > struct kmem_cache *slab; > @@ -1587,19 +1601,7 @@ struct drm_i915_private { > struct intel_shared_dpll shared_dplls[I915_NUM_PLLS]; > int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; > > - /* > - * workarounds are currently applied at different places and > - * changes are being done to consolidate them so exact count is > - * not clear at this point, use a max value for now. > - */ > -#define I915_MAX_WA_REGS 16 > - struct { > - u32 addr; > - u32 value; > - /* bitmask representing WA bits */ > - u32 mask; > - } intel_wa_regs[I915_MAX_WA_REGS]; > - u32 num_wa_regs; > + struct i915_workarounds workarounds; > > /* Reclocking support */ > bool render_reclock_avail; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 46cd0f9..4f336e23 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -665,87 +665,113 @@ err: > return ret; > } > > -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, > - u32 addr, u32 value) > +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) > { > + int ret, i; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > + struct i915_workarounds *w = &dev_priv->workarounds; > > - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) > - return; > + if (WARN_ON(w->count == 0)) > + return 0; > > - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > - intel_ring_emit(ring, addr); > - intel_ring_emit(ring, value); > + ring->gpu_caches_dirty = true; > + ret = intel_ring_flush_all_caches(ring); > + if (ret) > + return ret; > > - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; > - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; > - /* value is updated with the status of remaining bits of this > - * register when it is read from debugfs file > - */ > - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; > - dev_priv->num_wa_regs++; > + ret = intel_ring_begin(ring, w->count * 3); > + if (ret) > + return ret; > + > + for (i = 0; i < w->count; i++) { > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, w->reg[i].addr); > + intel_ring_emit(ring, w->reg[i].value); > + } > + > + intel_ring_advance(ring); > + > + ring->gpu_caches_dirty = true; > + ret = intel_ring_flush_all_caches(ring); > + if (ret) > + return ret; > + > + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); > > - return; > + return 0; > +} > + > +static int wa_add(struct drm_i915_private *dev_priv, > + const u32 addr, const u32 val, const u32 mask) > +{ > + const u32 idx = dev_priv->workarounds.count; > + > + if (WARN_ON(idx >= I915_MAX_WA_REGS)) > + return -ENOSPC; > + > + dev_priv->workarounds.reg[idx].addr = addr; > + dev_priv->workarounds.reg[idx].value = val; > + dev_priv->workarounds.reg[idx].mask = mask; > + > + dev_priv->workarounds.count++; > + > + return 0; > } > > +#define WA_REG(addr, val, mask) { \ > + const int r = wa_add(dev_priv, (addr), (val), (mask)); \ > + if (r) return r; } > + > +#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \ > + _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) > + > +#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \ > + _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) > + > +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) > +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) > + > +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff) > + > static int bdw_init_workarounds(struct intel_engine_cs *ring) > { > - int ret; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > - /* > - * workarounds applied in this fn are part of register state context, > - * they need to be re-initialized followed by gpu reset, suspend/resume, > - * module reload. > - */ > - dev_priv->num_wa_regs = 0; > - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); > - > - /* > - * update the number of dwords required based on the > - * actual number of workarounds applied > - */ > - ret = intel_ring_begin(ring, 24); > - if (ret) > - return ret; > - > /* WaDisablePartialInstShootdown:bdw */ > /* WaDisableThreadStallDopClockGating:bdw */ > - /* FIXME: Unclear whether we really need this on production bdw. */ > - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE > - | STALL_DOP_GATING_DISABLE)); > + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, > + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | > + STALL_DOP_GATING_DISABLE); > > /* WaDisableDopClockGating:bdw May not be needed for production */ > - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, > + DOP_CLOCK_GATING_DISABLE); > > /* > * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for > * pre-production hardware > */ > - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS > - | GEN8_SAMPLER_POWER_BYPASS_DIS)); > + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, > + GEN8_CENTROID_PIXEL_OPT_DIS | GEN8_SAMPLER_POWER_BYPASS_DIS); > > - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, > - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); > + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, > + GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE); > > - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, > - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); > + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, > + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); > > /* Use Force Non-Coherent whenever executing a 3D context. This is a > * workaround for for a possible hang in the unlikely event a TLB > * invalidation occurs during a PSD flush. > */ > - intel_ring_emit_wa(ring, HDC_CHICKEN0, > - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); > + WA_SET_BIT_MASKED(HDC_CHICKEN0, > + HDC_FORCE_NON_COHERENT); > > /* Wa4x4STCOptimizationDisable:bdw */ > - intel_ring_emit_wa(ring, CACHE_MODE_1, > - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); > + WA_SET_BIT_MASKED(CACHE_MODE_1, > + GEN8_4x4_STC_OPTIMIZATION_DISABLE); > > /* > * BSpec recommends 8x4 when MSAA is used, > @@ -755,52 +781,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) > * disable bit, which we don't touch here, but it's good > * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). > */ > - intel_ring_emit_wa(ring, GEN7_GT_MODE, > - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); > - > - intel_ring_advance(ring); > - > - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", > - dev_priv->num_wa_regs); > + WA_SET_BIT_MASKED(GEN7_GT_MODE, > + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); > > return 0; > } > > static int chv_init_workarounds(struct intel_engine_cs *ring) > { > - int ret; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > - /* > - * workarounds applied in this fn are part of register state context, > - * they need to be re-initialized followed by gpu reset, suspend/resume, > - * module reload. > - */ > - dev_priv->num_wa_regs = 0; > - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); > - > - ret = intel_ring_begin(ring, 12); > - if (ret) > - return ret; > - > /* WaDisablePartialInstShootdown:chv */ > - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); > + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, > + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); > > /* WaDisableThreadStallDopClockGating:chv */ > - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); > + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, > + STALL_DOP_GATING_DISABLE); > > /* WaDisableDopClockGating:chv (pre-production hw) */ > - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, > + DOP_CLOCK_GATING_DISABLE); > > /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ > - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); > + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, > + GEN8_SAMPLER_POWER_BYPASS_DIS); > > - intel_ring_advance(ring); > + return 0; > +} > + > +static int init_workarounds_ring(struct intel_engine_cs *ring) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + WARN_ON(ring->id != RCS); > + > + dev_priv->workarounds.count = 0; > + > + if (IS_BROADWELL(dev)) > + return bdw_init_workarounds(ring); > + > + if (IS_CHERRYVIEW(dev)) > + return chv_init_workarounds(ring); > > return 0; > } > @@ -864,7 +888,7 @@ static int init_render_ring(struct intel_engine_cs *ring) > > gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); > > - return ret; > + return init_workarounds_ring(ring); > } > > static void render_ring_cleanup(struct intel_engine_cs *ring) > @@ -2305,10 +2329,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > dev_priv->semaphore_obj = obj; > } > } > - if (IS_CHERRYVIEW(dev)) > - ring->init_context = chv_init_workarounds; > - else > - ring->init_context = bdw_init_workarounds; > + > + ring->init_context = intel_ring_workarounds_emit; > ring->add_request = gen6_add_request; > ring->flush = gen8_render_ring_flush; > ring->irq_get = gen8_ring_get_irq; > -- > 1.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx