Re: [PATCH] drm/i915: Remove unnecessary memory quiescing for aux inval

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Sep 20, 2023 at 01:11:31PM +0200, Nirmoy Das wrote:
> i915 already does memory quiesce before signaling
> breadcrumb so remove extra memory quiescing for aux
> invalidation which can cause unnecessary side effects.

This explanation seems confusing to me.  If we've already performed the
necessary flushing and quiesced all cache<->memory traffic, then
performing another flush should just be a noop, right?  If we're seeing
side effects, then doesn't that imply that there was still something in
the cache that hadn't made its way to memory yet?

Is the breadcrumb code flushing all the necessary bits?  What about
PIPE_CONTROL_CCS_FLUSH?


Matt

> 
> Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation")
> Cc: Jonathan Cavitt <jonathan.cavitt@xxxxxxxxx>
> Cc: Andi Shyti <andi.shyti@xxxxxxxxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx> # v5.8+
> Cc: Andrzej Hajda <andrzej.hajda@xxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> Cc: Matt Roper <matthew.d.roper@xxxxxxxxx>
> Cc: Tejas Upadhyay <tejas.upadhyay@xxxxxxxxx>
> Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx>
> Cc: Prathap Kumar Valsan <prathap.kumar.valsan@xxxxxxxxx>
> Cc: Tapani Pälli <tapani.palli@xxxxxxxxx>
> Cc: Mark Janes <mark.janes@xxxxxxxxx>
> Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
> Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------
>  1 file changed, 26 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 0143445dba83..5001670046a0 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  {
>  	struct intel_engine_cs *engine = rq->engine;
>  
> -	/*
> -	 * On Aux CCS platforms the invalidation of the Aux
> -	 * table requires quiescing memory traffic beforehand
> -	 */
> -	if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
> +	if (mode & EMIT_FLUSH) {
>  		u32 bit_group_0 = 0;
>  		u32 bit_group_1 = 0;
>  		int err;
> @@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  
>  		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
>  
> -		/*
> -		 * When required, in MTL and beyond platforms we
> -		 * need to set the CCS_FLUSH bit in the pipe control
> -		 */
> -		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> -			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> -
>  		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
>  		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
>  		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  {
>  	struct drm_i915_private *i915 = rq->i915;
>  	struct intel_gt *gt = rq->engine->gt;
> -	u32 flags = (PIPE_CONTROL_CS_STALL |
> -		     PIPE_CONTROL_TLB_INVALIDATE |
> -		     PIPE_CONTROL_TILE_CACHE_FLUSH |
> -		     PIPE_CONTROL_FLUSH_L3 |
> -		     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> -		     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> -		     PIPE_CONTROL_DC_FLUSH_ENABLE |
> -		     PIPE_CONTROL_FLUSH_ENABLE);
> +	u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +	u32 bit_group_1 = (PIPE_CONTROL_CS_STALL |
> +			   PIPE_CONTROL_TLB_INVALIDATE |
> +			   PIPE_CONTROL_TILE_CACHE_FLUSH |
> +			   PIPE_CONTROL_FLUSH_L3 |
> +			   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +			   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +			   PIPE_CONTROL_DC_FLUSH_ENABLE |
> +			   PIPE_CONTROL_FLUSH_ENABLE);
>  
>  	/* Wa_14016712196 */
>  	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
> @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  
>  	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
>  		/* Wa_1409600907 */
> -		flags |= PIPE_CONTROL_DEPTH_STALL;
> +		bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
>  
>  	if (!HAS_3D_PIPELINE(rq->i915))
> -		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> +		bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
>  	else if (rq->engine->class == COMPUTE_CLASS)
> -		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +		bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
> +
> +	/*
> +	 * On Aux CCS platforms the invalidation of the Aux
> +	 * table requires quiescing memory traffic beforehand.
> +	 * gen12_emit_fini_breadcrumb_rcs() does this for us as
> +	 * all memory traffic has to be completed before we signal
> +	 * the breadcrumb. In MTL and beyond platforms, we need to also
> +	 * add CCS_FLUSH bit in the pipe control for that purpose.
> +	 */
> +
> +	if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +		bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
>  
> -	cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
> +	cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
>  
>  	/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
>  	cs = gen12_emit_ggtt_write_rcs(cs,
> -- 
> 2.41.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux