Re: [PATCH 4/6] drm/i915: Build workaround list in ring initialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Sep 18, 2014 at 05:58:33PM +0300, Mika Kuoppala wrote:
> to disassociate workaround list init from the actual writing
> of values. This is needed as not workarounds will be masked bit
> enables and we want full control on when the read part of RMW
> will happen.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>

The piece imo still missing here is adding all the other (non-render
context) wa registers to the wa list. It looks like wa_add should be able
to cope, but I prefer we check that by e.g. reworking all gen7+
clock_gating wa to use this.

This would also mean that we need to make these functions non-static.
Might as well go nuts and extract most of the w/a functionality into a new
intel_wa.c with a bit of DOC: overview sections and the important
functions (intel_wa_add) having proper kerneldoc. Of course we can do this
as a follow up once things settle a bit.

Otherwise I think this is going in the right direction.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |  18 +--
>  drivers/gpu/drm/i915/i915_drv.h         |  28 ++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 188 ++++++++++++++++++--------------
>  3 files changed, 129 insertions(+), 105 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 89b740b..c35c6ce 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2657,18 +2657,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
>  
>  	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
>  
> -	seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
> -	for (i = 0; i < dev_priv->num_wa_regs; ++i) {
> +	seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
> +	for (i = 0; i < dev_priv->workarounds.count; ++i) {
>  		u32 addr, mask;
>  
> -		addr = dev_priv->intel_wa_regs[i].addr;
> -		mask = dev_priv->intel_wa_regs[i].mask;
> -		dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
> -		if (dev_priv->intel_wa_regs[i].addr)
> +		addr = dev_priv->workarounds.reg[i].addr;
> +		mask = dev_priv->workarounds.reg[i].mask;
> +		dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask;
> +		if (dev_priv->workarounds.reg[i].addr)
>  			seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> -				   dev_priv->intel_wa_regs[i].addr,
> -				   dev_priv->intel_wa_regs[i].value,
> -				   dev_priv->intel_wa_regs[i].mask);
> +				   dev_priv->workarounds.reg[i].addr,
> +				   dev_priv->workarounds.reg[i].value,
> +				   dev_priv->workarounds.reg[i].mask);
>  	}
>  
>  	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 49b45ec..3087d5a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1445,6 +1445,20 @@ struct i915_frontbuffer_tracking {
>  	unsigned flip_bits;
>  };
>  
> +struct i915_wa_reg {
> +	u32 addr;
> +	u32 value;
> +	/* bitmask representing WA bits */
> +	u32 mask;
> +};
> +
> +#define I915_MAX_WA_REGS 16
> +
> +struct i915_workarounds {
> +	struct i915_wa_reg reg[I915_MAX_WA_REGS];
> +	u32 count;
> +};
> +
>  struct drm_i915_private {
>  	struct drm_device *dev;
>  	struct kmem_cache *slab;
> @@ -1587,19 +1601,7 @@ struct drm_i915_private {
>  	struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
>  	int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
>  
> -	/*
> -	 * workarounds are currently applied at different places and
> -	 * changes are being done to consolidate them so exact count is
> -	 * not clear at this point, use a max value for now.
> -	 */
> -#define I915_MAX_WA_REGS  16
> -	struct {
> -		u32 addr;
> -		u32 value;
> -		/* bitmask representing WA bits */
> -		u32 mask;
> -	} intel_wa_regs[I915_MAX_WA_REGS];
> -	u32 num_wa_regs;
> +	struct i915_workarounds workarounds;
>  
>  	/* Reclocking support */
>  	bool render_reclock_avail;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 46cd0f9..4f336e23 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -665,87 +665,113 @@ err:
>  	return ret;
>  }
>  
> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> -				       u32 addr, u32 value)
> +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
>  {
> +	int ret, i;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_workarounds *w = &dev_priv->workarounds;
>  
> -	if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
> -		return;
> +	if (WARN_ON(w->count == 0))
> +		return 0;
>  
> -	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> -	intel_ring_emit(ring, addr);
> -	intel_ring_emit(ring, value);
> +	ring->gpu_caches_dirty = true;
> +	ret = intel_ring_flush_all_caches(ring);
> +	if (ret)
> +		return ret;
>  
> -	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> -	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
> -	/* value is updated with the status of remaining bits of this
> -	 * register when it is read from debugfs file
> -	 */
> -	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> -	dev_priv->num_wa_regs++;
> +	ret = intel_ring_begin(ring, w->count * 3);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < w->count; i++) {
> +		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +		intel_ring_emit(ring, w->reg[i].addr);
> +		intel_ring_emit(ring, w->reg[i].value);
> +	}
> +
> +	intel_ring_advance(ring);
> +
> +	ring->gpu_caches_dirty = true;
> +	ret = intel_ring_flush_all_caches(ring);
> +	if (ret)
> +		return ret;
> +
> +	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
>  
> -	return;
> +	return 0;
> +}
> +
> +static int wa_add(struct drm_i915_private *dev_priv,
> +		  const u32 addr, const u32 val, const u32 mask)
> +{
> +	const u32 idx = dev_priv->workarounds.count;
> +
> +	if (WARN_ON(idx >= I915_MAX_WA_REGS))
> +		return -ENOSPC;
> +
> +	dev_priv->workarounds.reg[idx].addr = addr;
> +	dev_priv->workarounds.reg[idx].value = val;
> +	dev_priv->workarounds.reg[idx].mask = mask;
> +
> +	dev_priv->workarounds.count++;
> +
> +	return 0;
>  }
>  
> +#define WA_REG(addr, val, mask) { \
> +		const int r = wa_add(dev_priv, (addr), (val), (mask)); \
> +		if (r) return r; }
> +
> +#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \
> +				    _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
> +
> +#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \
> +				    _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
> +
> +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
> +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
> +
> +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
> +
>  static int bdw_init_workarounds(struct intel_engine_cs *ring)
>  {
> -	int ret;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	/*
> -	 * workarounds applied in this fn are part of register state context,
> -	 * they need to be re-initialized followed by gpu reset, suspend/resume,
> -	 * module reload.
> -	 */
> -	dev_priv->num_wa_regs = 0;
> -	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> -	/*
> -	 * update the number of dwords required based on the
> -	 * actual number of workarounds applied
> -	 */
> -	ret = intel_ring_begin(ring, 24);
> -	if (ret)
> -		return ret;
> -
>  	/* WaDisablePartialInstShootdown:bdw */
>  	/* WaDisableThreadStallDopClockGating:bdw */
> -	/* FIXME: Unclear whether we really need this on production bdw. */
> -	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> -			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
> -					     | STALL_DOP_GATING_DISABLE));
> +	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> +		  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
> +		  STALL_DOP_GATING_DISABLE);
>  
>  	/* WaDisableDopClockGating:bdw May not be needed for production */
> -	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> -			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> +	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> +		  DOP_CLOCK_GATING_DISABLE);
>  
>  	/*
>  	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
>  	 * pre-production hardware
>  	 */
> -	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> -			   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
> -					      | GEN8_SAMPLER_POWER_BYPASS_DIS));
> +	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> +		  GEN8_CENTROID_PIXEL_OPT_DIS | GEN8_SAMPLER_POWER_BYPASS_DIS);
>  
> -	intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
> -			   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> +	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
> +		  GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE);
>  
> -	intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
> -			   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> +	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> +		  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>  
>  	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>  	 * workaround for for a possible hang in the unlikely event a TLB
>  	 * invalidation occurs during a PSD flush.
>  	 */
> -	intel_ring_emit_wa(ring, HDC_CHICKEN0,
> -			   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> +	WA_SET_BIT_MASKED(HDC_CHICKEN0,
> +		  HDC_FORCE_NON_COHERENT);
>  
>  	/* Wa4x4STCOptimizationDisable:bdw */
> -	intel_ring_emit_wa(ring, CACHE_MODE_1,
> -			   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
> +	WA_SET_BIT_MASKED(CACHE_MODE_1,
> +		  GEN8_4x4_STC_OPTIMIZATION_DISABLE);
>  
>  	/*
>  	 * BSpec recommends 8x4 when MSAA is used,
> @@ -755,52 +781,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>  	 * disable bit, which we don't touch here, but it's good
>  	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>  	 */
> -	intel_ring_emit_wa(ring, GEN7_GT_MODE,
> -			   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> -
> -	intel_ring_advance(ring);
> -
> -	DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
> -			 dev_priv->num_wa_regs);
> +	WA_SET_BIT_MASKED(GEN7_GT_MODE,
> +		  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>  
>  	return 0;
>  }
>  
>  static int chv_init_workarounds(struct intel_engine_cs *ring)
>  {
> -	int ret;
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	/*
> -	 * workarounds applied in this fn are part of register state context,
> -	 * they need to be re-initialized followed by gpu reset, suspend/resume,
> -	 * module reload.
> -	 */
> -	dev_priv->num_wa_regs = 0;
> -	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> -	ret = intel_ring_begin(ring, 12);
> -	if (ret)
> -		return ret;
> -
>  	/* WaDisablePartialInstShootdown:chv */
> -	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> -			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> +	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> +		  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>  
>  	/* WaDisableThreadStallDopClockGating:chv */
> -	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> -			   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> +	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> +		  STALL_DOP_GATING_DISABLE);
>  
>  	/* WaDisableDopClockGating:chv (pre-production hw) */
> -	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> -			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> +	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> +		  DOP_CLOCK_GATING_DISABLE);
>  
>  	/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
> -	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> -			   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> +	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
> +		  GEN8_SAMPLER_POWER_BYPASS_DIS);
>  
> -	intel_ring_advance(ring);
> +	return 0;
> +}
> +
> +static int init_workarounds_ring(struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ring->id != RCS);
> +
> +	dev_priv->workarounds.count = 0;
> +
> +	if (IS_BROADWELL(dev))
> +		return bdw_init_workarounds(ring);
> +
> +	if (IS_CHERRYVIEW(dev))
> +		return chv_init_workarounds(ring);
>  
>  	return 0;
>  }
> @@ -864,7 +888,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
>  
>  	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>  
> -	return ret;
> +	return init_workarounds_ring(ring);
>  }
>  
>  static void render_ring_cleanup(struct intel_engine_cs *ring)
> @@ -2305,10 +2329,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  					dev_priv->semaphore_obj = obj;
>  			}
>  		}
> -		if (IS_CHERRYVIEW(dev))
> -			ring->init_context = chv_init_workarounds;
> -		else
> -			ring->init_context = bdw_init_workarounds;
> +
> +		ring->init_context = intel_ring_workarounds_emit;
>  		ring->add_request = gen6_add_request;
>  		ring->flush = gen8_render_ring_flush;
>  		ring->irq_get = gen8_ring_get_irq;
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux