This rework is based on suggestion from Chris. Now w/a are organized in an array and all of them are emitted in single fn instead of sending them individually. This approach is clean and new w/a can be added with minimal changes. The same array can be used when exporting them to debugfs and the temporary array in the current implementation is not required. v2: As suggested by Damien a new w/a reg definition is added. This helps in initializing w/a that are unmasked registers. Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 158 ++++++++++++++------------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ++++++ 2 files changed, 93 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c5e4dc7..3ed0ad5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -650,87 +650,80 @@ err: return ret; } -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, - u32 addr, u32 value) +static int i915_request_emit_lri(struct intel_engine_cs *ring, + int num_registers, + const struct intel_wa_reg *lri_list) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + int i; + int ret; + struct drm_i915_private *dev_priv = ring->dev->dev_private; - if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) - return; + ret = intel_ring_begin(ring, (2 * num_registers + 1)); + if (ret) + return ret; - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, addr); - intel_ring_emit(ring, value); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers)); + for (i = 0; i < num_registers; ++i) { + u32 value; + const struct intel_wa_reg *p = (lri_list + i); - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; - /* value is updated with the status of remaining bits of this - * register when it is read from debugfs file - */ - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; - dev_priv->num_wa_regs++; + if (p->masked_register) + value = (p->mask << 16) | p->value; + else + value = (I915_READ(p->addr) & ~p->mask) | p->value; + intel_ring_emit(ring, p->addr); + intel_ring_emit(ring, value); + } - return; + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; } -static int bdw8_init_workarounds(struct intel_engine_cs *ring) -{ - int ret; - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; +static const struct intel_wa_reg bdw_ring_init_context[] = { /* * workarounds applied in this fn are part of register state context, * they need to be re-initialized followed by gpu reset, suspend/resume, * module reload. */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - /* - * update the number of dwords required based on the - * actual number of workarounds applied - */ - ret = intel_ring_begin(ring, 24); - if (ret) - return ret; /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw */ /* FIXME: Unclear whether we really need this on production bdw. */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE - | STALL_DOP_GATING_DISABLE)); + INIT_MASKED_WA(GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)), /* WaDisableDopClockGating:bdw May not be needed for production */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + INIT_MASKED_WA(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)), /* * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for * pre-production hardware */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS - | GEN8_SAMPLER_POWER_BYPASS_DIS)); + INIT_MASKED_WA(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS) | + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)), - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + INIT_MASKED_WA(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)), - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + INIT_MASKED_WA(COMMON_SLICE_CHICKEN2, + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)), /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ - intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + INIT_MASKED_WA(HDC_CHICKEN0, + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)), /* Wa4x4STCOptimizationDisable:bdw */ - intel_ring_emit_wa(ring, CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + INIT_MASKED_WA(CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)), /* * BSpec recommends 8x4 when MSAA is used, @@ -740,54 +733,40 @@ static int bdw8_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - intel_ring_emit_wa(ring, GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - intel_ring_advance(ring); - - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", - dev_priv->num_wa_regs); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *ring) -{ - int ret; - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - ret = intel_ring_begin(ring, 12); - if (ret) - return ret; + INIT_MASKED_WA(GEN7_GT_MODE, + (GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4)), +}; +static const struct intel_wa_reg chv_ring_init_context[] = { /* WaDisablePartialInstShootdown:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - /* WaDisableThreadStallDopClockGating:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + INIT_MASKED_WA(GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)), /* WaDisableDopClockGating:chv (pre-production hw) */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + INIT_MASKED_WA(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)), /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + INIT_MASKED_WA(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)), +}; - intel_ring_advance(ring); +static int ring_init_context(struct intel_engine_cs *ring) +{ + int ret = -EINVAL; - return 0; + if (IS_CHERRYVIEW(ring->dev)) + ret = i915_request_emit_lri(ring, + ARRAY_SIZE(chv_ring_init_context), + chv_ring_init_context); + else + ret = i915_request_emit_lri(ring, + ARRAY_SIZE(bdw_ring_init_context), + bdw_ring_init_context); + + return ret; } static int init_render_ring(struct intel_engine_cs *ring) @@ -2275,10 +2254,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - if (IS_CHERRYVIEW(dev)) - ring->init_context = chv_init_workarounds; - else - ring->init_context = bdw_init_workarounds; + ring->init_context = ring_init_context; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96479c8..c9ed06c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -317,6 +317,32 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +/* + * Workaround register definition + * mask: bits correponding to the w/a + * value: represents reference values of w/a bits + */ +struct intel_wa_reg { + u32 masked_register : 1; + u32 addr; + u32 mask; + u32 value; +}; + +#define INIT_WA_REG(_masked, _addr, _mask, _value) \ +{ \ + .masked_register = _masked, \ + .addr = _addr, \ + .mask = _mask, \ + .value = _value,\ +} + +#define INIT_MASKED_WA(_addr, _value) \ + INIT_WA_REG(1, _addr, ((_value) >> 16), ((_value) & 0xFFFF)) + +#define INIT_UNMASKED_WA(_addr, _value) \ + INIT_WA_REG(0, _addr, ~(_value), _value) + bool intel_ring_initialized(struct intel_engine_cs *ring); static inline unsigned -- 2.0.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx