Re: [PATCH 28/53] drm/i915/bdw: GEN-specific logical ring emit flush

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 13, 2014 at 08:37:46AM -0700, oscar.mateo@xxxxxxxxx wrote:
> From: Oscar Mateo <oscar.mateo@xxxxxxxxx>
> 
> Notice that the BSD invalidate bit is no longer present in GEN8, so

Hmm. As far as I can tell, it is still present for VCS on gen8. As to
whether we need to set it, I don't know.

> we can consolidate the blt and bsd ring flushes into one.
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        | 80 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  7 ---
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++
>  3 files changed, 91 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3debe8b..3d7fcd6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -343,6 +343,81 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
>  	return ret;
>  }
>  
> +static int gen8_emit_flush(struct intel_engine_cs *ring,
> +			   struct intel_context *ctx,
> +			   u32 invalidate_domains,
> +			   u32 unused)
> +{
> +	struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
> +	uint32_t cmd;
> +	int ret;
> +
> +	ret = intel_logical_ring_begin(ring, ctx, 4);
> +	if (ret)
> +		return ret;
> +
> +	cmd = MI_FLUSH_DW + 1;
> +
> +	/*
> +	 * Bspec vol 1c.3 - blitter engine command streamer:
> +	 * "If ENABLED, all TLBs will be invalidated once the flush
> +	 * operation is complete. This bit is only valid when the
> +	 * Post-Sync Operation field is a value of 1h or 3h."
> +	 */
> +	if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
> +		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
> +			MI_FLUSH_DW_OP_STOREDW;
> +	intel_logical_ring_emit(ringbuf, cmd);
> +	intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
> +	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
> +	intel_logical_ring_emit(ringbuf, 0); /* value */
> +	intel_logical_ring_advance(ringbuf);
> +
> +	return 0;
> +}
> +
> +static int gen8_emit_flush_render(struct intel_engine_cs *ring,
> +				  struct intel_context *ctx,
> +				  u32 invalidate_domains,
> +				  u32 flush_domains)
> +{
> +	struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
> +	u32 flags = 0;
> +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
> +	int ret;
> +
> +	flags |= PIPE_CONTROL_CS_STALL;
> +
> +	if (flush_domains) {
> +		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +	}
> +	if (invalidate_domains) {
> +		flags |= PIPE_CONTROL_TLB_INVALIDATE;
> +		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +		flags |= PIPE_CONTROL_QW_WRITE;
> +		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +	}
> +
> +	ret = intel_logical_ring_begin(ring, ctx, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
> +	intel_logical_ring_emit(ringbuf, flags);
> +	intel_logical_ring_emit(ringbuf, scratch_addr);
> +	intel_logical_ring_emit(ringbuf, 0);
> +	intel_logical_ring_emit(ringbuf, 0);
> +	intel_logical_ring_emit(ringbuf, 0);
> +	intel_logical_ring_advance(ringbuf);
> +
> +	return 0;
> +}
> +
>  static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
>  {
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> @@ -491,6 +566,7 @@ static int logical_render_ring_init(struct drm_device *dev)
>  	ring->set_seqno = gen8_set_seqno;
>  	ring->submit_ctx = gen8_submit_ctx;
>  	ring->emit_request = gen8_emit_request_render;
> +	ring->emit_flush = gen8_emit_flush_render;
>  
>  	return logical_ring_init(dev, ring);
>  }
> @@ -511,6 +587,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
>  	ring->set_seqno = gen8_set_seqno;
>  	ring->submit_ctx = gen8_submit_ctx;
>  	ring->emit_request = gen8_emit_request;
> +	ring->emit_flush = gen8_emit_flush;
>  
>  	return logical_ring_init(dev, ring);
>  }
> @@ -531,6 +608,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
>  	ring->set_seqno = gen8_set_seqno;
>  	ring->submit_ctx = gen8_submit_ctx;
>  	ring->emit_request = gen8_emit_request;
> +	ring->emit_flush = gen8_emit_flush;
>  
>  	return logical_ring_init(dev, ring);
>  }
> @@ -551,6 +629,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
>  	ring->set_seqno = gen8_set_seqno;
>  	ring->submit_ctx = gen8_submit_ctx;
>  	ring->emit_request = gen8_emit_request;
> +	ring->emit_flush = gen8_emit_flush;
>  
>  	return logical_ring_init(dev, ring);
>  }
> @@ -571,6 +650,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
>  	ring->set_seqno = gen8_set_seqno;
>  	ring->submit_ctx = gen8_submit_ctx;
>  	ring->emit_request = gen8_emit_request;
> +	ring->emit_flush = gen8_emit_flush;
>  
>  	return logical_ring_init(dev, ring);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 137ee9a..a128f6f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -33,13 +33,6 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  
> -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
> - * but keeps the logic simple. Indeed, the whole purpose of this macro is just
> - * to give some inclination as to some of the magic values used in the various
> - * workarounds!
> - */
> -#define CACHELINE_BYTES 64
> -
>  bool
>  intel_ring_initialized(struct intel_engine_cs *ring)
>  {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d8ded14..527db2a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -5,6 +5,13 @@
>  
>  #define I915_CMD_HASH_ORDER 9
>  
> +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
> + * but keeps the logic simple. Indeed, the whole purpose of this macro is just
> + * to give some inclination as to some of the magic values used in the various
> + * workarounds!
> + */
> +#define CACHELINE_BYTES 64
> +
>  /*
>   * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
>   * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
> @@ -153,6 +160,10 @@ struct  intel_engine_cs {
>  				      struct intel_context *ctx, u32 value);
>  	int		(*emit_request)(struct intel_engine_cs *ring,
>  					struct intel_context *ctx);
> +	int __must_check (*emit_flush)(struct intel_engine_cs *ring,
> +				       struct intel_context *ctx,
> +				       u32 invalidate_domains,
> +				       u32 flush_domains);

Any reason to make this one __must_check but not the others?

Brad

>  
>  	/**
>  	 * List of objects currently involved in rendering from the
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux