Re: [PATCH 16/55] drm/i915: Remove obsolete engine->gpu_caches_dirty

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On ma, 2016-07-25 at 18:31 +0100, Chris Wilson wrote:
> Space for flushing the GPU cache prior to completing the request is
> preallocated and so cannot fail - the GPU caches will always be flushed
> along with the completed request. This means we no longer have to track
> whether the GPU cache is dirty between batches like we had to with the
> outstanding_lazy_seqno.
> 
> With the removal of the duplication in the per-backend entry points for
> emitting the obsolete lazy flush, we can then further unify the
> engine->emit_flush.
> 
> v2: Expand a bit on the legacy of gpu_caches_dirty
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>

> Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-18-git-send-email-chris@xxxxxxxxxxxxxxxxxx
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c    |  2 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +---
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 11 +++--
>  drivers/gpu/drm/i915/i915_gem_request.c    |  8 ++--
>  drivers/gpu/drm/i915/intel_lrc.c           | 47 +++----------------
>  drivers/gpu/drm/i915/intel_lrc.h           |  2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c    | 72 +++++++-----------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |  7 ---
>  8 files changed, 37 insertions(+), 121 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3336a5fcd029..beece8feb8fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>  	 * itlb_before_ctx_switch.
>  	 */
>  	if (IS_GEN6(dev_priv)) {
> -		ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
> +		ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index d0ef675fb169..35c4c595e5ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -998,10 +998,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
>  		wmb();
>  
> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
> -	 * any residual writes from the previous batch.
> -	 */
> -	return intel_engine_invalidate_all_caches(req);
> +	/* Unconditionally invalidate GPU caches and TLBs. */
> +	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
>  }
>  
>  static bool
> @@ -1163,9 +1161,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  static void
>  i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
>  {
> -	/* Unconditionally force add_request to emit a full flush. */
> -	params->engine->gpu_caches_dirty = true;
> -
>  	/* Add a breadcrumb for the completion of the batch buffer */
>  	__i915_add_request(params->request, params->batch_obj, true);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ebfa0406a6a1..39fa9eb10514 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1666,7 +1666,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	int ret;
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = engine->emit_flush(req,
> +				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -1693,7 +1694,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	int ret;
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
> -	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +	ret = engine->emit_flush(req,
> +				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -1711,8 +1713,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>  	/* XXX: RCS is the only one to auto invalidate the TLBs? */
>  	if (engine->id != RCS) {
> -		ret = engine->flush(req,
> -				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> +		ret = engine->emit_flush(req,
> +					 I915_GEM_GPU_DOMAINS,
> +					 I915_GEM_GPU_DOMAINS);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 942b5b1f1602..7e3206051ced 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -451,12 +451,10 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>  	 * what.
>  	 */
>  	if (flush_caches) {
> -		if (i915.enable_execlists)
> -			ret = logical_ring_flush_all_caches(request);
> -		else
> -			ret = intel_engine_flush_all_caches(request);
> +		ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS);
> +
>  		/* Not allowed to fail! */
> -		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
> +		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
>  	}
>  
>  	trace_i915_gem_request_add(request);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 86b8f41c254d..e8d971e81491 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -642,24 +642,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
>  	spin_unlock_bh(&engine->execlist_lock);
>  }
>  
> -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	uint32_t flush_domains;
> -	int ret;
> -
> -	flush_domains = 0;
> -	if (engine->gpu_caches_dirty)
> -		flush_domains = I915_GEM_GPU_DOMAINS;
> -
> -	ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
> -	if (ret)
> -		return ret;
> -
> -	engine->gpu_caches_dirty = false;
> -	return 0;
> -}
> -
>  static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>  				 struct list_head *vmas)
>  {
> @@ -690,7 +672,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>  	/* Unconditionally invalidate gpu caches and ensure that we do flush
>  	 * any residual writes from the previous batch.
>  	 */
> -	return logical_ring_invalidate_all_caches(req);
> +	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
>  }
>  
>  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
> @@ -930,22 +912,6 @@ void intel_logical_ring_stop(struct intel_engine_cs *engine)
>  	I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
>  }
>  
> -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	int ret;
> -
> -	if (!engine->gpu_caches_dirty)
> -		return 0;
> -
> -	ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
> -	if (ret)
> -		return ret;
> -
> -	engine->gpu_caches_dirty = false;
> -	return 0;
> -}
> -
>  static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  				struct intel_engine_cs *engine)
>  {
> @@ -1026,15 +992,15 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
>  static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  {
>  	int ret, i;
> -	struct intel_engine_cs *engine = req->engine;
>  	struct intel_ring *ring = req->ring;
>  	struct i915_workarounds *w = &req->i915->workarounds;
>  
>  	if (w->count == 0)
>  		return 0;
>  
> -	engine->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(req);
> +	ret = req->engine->emit_flush(req,
> +				      I915_GEM_GPU_DOMAINS,
> +				      I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -1051,8 +1017,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  
>  	intel_ring_advance(ring);
>  
> -	engine->gpu_caches_dirty = true;
> -	ret = logical_ring_flush_all_caches(req);
> +	ret = req->engine->emit_flush(req,
> +				      I915_GEM_GPU_DOMAINS,
> +				      I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index d26fb44549e5..33e0193e5451 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -72,8 +72,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
>  
>  int intel_engines_init(struct drm_device *dev);
>  
> -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
> -
>  /* Logical Ring Contexts */
>  
>  /* One extra page is added before LRC for GuC as shared data */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e7a7f67ab06d..9e4b49644553 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -688,8 +688,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  	if (w->count == 0)
>  		return 0;
>  
> -	req->engine->gpu_caches_dirty = true;
> -	ret = intel_engine_flush_all_caches(req);
> +	ret = req->engine->emit_flush(req,
> +				      I915_GEM_GPU_DOMAINS,
> +				      I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -706,8 +707,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  
>  	intel_ring_advance(ring);
>  
> -	req->engine->gpu_caches_dirty = true;
> -	ret = intel_engine_flush_all_caches(req);
> +	ret = req->engine->emit_flush(req,
> +				      I915_GEM_GPU_DOMAINS,
> +				      I915_GEM_GPU_DOMAINS);
>  	if (ret)
>  		return ret;
>  
> @@ -2860,21 +2862,21 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>  	if (INTEL_GEN(dev_priv) >= 8) {
>  		engine->init_context = intel_rcs_ctx_init;
>  		engine->add_request = gen8_render_add_request;
> -		engine->flush = gen8_render_ring_flush;
> +		engine->emit_flush = gen8_render_ring_flush;
>  		if (i915.semaphores)
>  			engine->semaphore.signal = gen8_rcs_signal;
>  	} else if (INTEL_GEN(dev_priv) >= 6) {
>  		engine->init_context = intel_rcs_ctx_init;
> -		engine->flush = gen7_render_ring_flush;
> +		engine->emit_flush = gen7_render_ring_flush;
>  		if (IS_GEN6(dev_priv))
> -			engine->flush = gen6_render_ring_flush;
> +			engine->emit_flush = gen6_render_ring_flush;
>  	} else if (IS_GEN5(dev_priv)) {
> -		engine->flush = gen4_render_ring_flush;
> +		engine->emit_flush = gen4_render_ring_flush;
>  	} else {
>  		if (INTEL_GEN(dev_priv) < 4)
> -			engine->flush = gen2_render_ring_flush;
> +			engine->emit_flush = gen2_render_ring_flush;
>  		else
> -			engine->flush = gen4_render_ring_flush;
> +			engine->emit_flush = gen4_render_ring_flush;
>  		engine->irq_enable_mask = I915_USER_INTERRUPT;
>  	}
>  
> @@ -2911,12 +2913,12 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>  		/* gen6 bsd needs a special wa for tail updates */
>  		if (IS_GEN6(dev_priv))
>  			engine->write_tail = gen6_bsd_ring_write_tail;
> -		engine->flush = gen6_bsd_ring_flush;
> +		engine->emit_flush = gen6_bsd_ring_flush;
>  		if (INTEL_GEN(dev_priv) < 8)
>  			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>  	} else {
>  		engine->mmio_base = BSD_RING_BASE;
> -		engine->flush = bsd_ring_flush;
> +		engine->emit_flush = bsd_ring_flush;
>  		if (IS_GEN5(dev_priv))
>  			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
>  		else
> @@ -2935,7 +2937,7 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
>  
>  	intel_ring_default_vfuncs(dev_priv, engine);
>  
> -	engine->flush = gen6_bsd_ring_flush;
> +	engine->emit_flush = gen6_bsd_ring_flush;
>  
>  	return intel_init_ring_buffer(engine);
>  }
> @@ -2946,7 +2948,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
>  
>  	intel_ring_default_vfuncs(dev_priv, engine);
>  
> -	engine->flush = gen6_ring_flush;
> +	engine->emit_flush = gen6_ring_flush;
>  	if (INTEL_GEN(dev_priv) < 8)
>  		engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
>  
> @@ -2959,7 +2961,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>  
>  	intel_ring_default_vfuncs(dev_priv, engine);
>  
> -	engine->flush = gen6_ring_flush;
> +	engine->emit_flush = gen6_ring_flush;
>  
>  	if (INTEL_GEN(dev_priv) < 8) {
>  		engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
> @@ -2970,46 +2972,6 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>  	return intel_init_ring_buffer(engine);
>  }
>  
> -int
> -intel_engine_flush_all_caches(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	int ret;
> -
> -	if (!engine->gpu_caches_dirty)
> -		return 0;
> -
> -	ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS);
> -	if (ret)
> -		return ret;
> -
> -	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
> -
> -	engine->gpu_caches_dirty = false;
> -	return 0;
> -}
> -
> -int
> -intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	uint32_t flush_domains;
> -	int ret;
> -
> -	flush_domains = 0;
> -	if (engine->gpu_caches_dirty)
> -		flush_domains = I915_GEM_GPU_DOMAINS;
> -
> -	ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
> -	if (ret)
> -		return ret;
> -
> -	trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
> -
> -	engine->gpu_caches_dirty = false;
> -	return 0;
> -}
> -
>  void intel_engine_stop(struct intel_engine_cs *engine)
>  {
>  	int ret;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index ba54ffcdd55a..00723401f98c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -206,9 +206,6 @@ struct intel_engine_cs {
>  
>  	void		(*write_tail)(struct intel_engine_cs *engine,
>  				      u32 value);
> -	int __must_check (*flush)(struct drm_i915_gem_request *req,
> -				  u32	invalidate_domains,
> -				  u32	flush_domains);
>  	int		(*add_request)(struct drm_i915_gem_request *req);
>  	/* Some chipsets are not quite as coherent as advertised and need
>  	 * an expensive kick to force a true read of the up-to-date seqno.
> @@ -325,8 +322,6 @@ struct intel_engine_cs {
>  	 */
>  	u32 last_submitted_seqno;
>  
> -	bool gpu_caches_dirty;
> -
>  	struct i915_gem_context *last_context;
>  
>  	struct intel_engine_hangcheck hangcheck;
> @@ -474,8 +469,6 @@ void intel_ring_update_space(struct intel_ring *ring);
>  
>  int __must_check intel_engine_idle(struct intel_engine_cs *engine);
>  void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
> -int intel_engine_flush_all_caches(struct drm_i915_gem_request *req);
> -int intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req);
>  
>  int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
>  void intel_fini_pipe_control(struct intel_engine_cs *engine);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux