Re: [PATCH 021/190] drm/i915: Use HWS for seqno tracking everywhere

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/01/16 09:16, Chris Wilson wrote:
By using the same address for storing the HWS on every platform, we can
remove the platform specific vfuncs and reduce the get-seqno routine to
a single read of a cached memory location.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_debugfs.c      | 10 ++--
  drivers/gpu/drm/i915/i915_drv.h          |  4 +-
  drivers/gpu/drm/i915/i915_gpu_error.c    |  2 +-
  drivers/gpu/drm/i915/i915_irq.c          |  4 +-
  drivers/gpu/drm/i915/i915_trace.h        |  2 +-
  drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 +-
  drivers/gpu/drm/i915/intel_lrc.c         | 46 ++---------------
  drivers/gpu/drm/i915/intel_ringbuffer.c  | 86 ++++++++------------------------
  drivers/gpu/drm/i915/intel_ringbuffer.h  |  7 +--
  9 files changed, 43 insertions(+), 122 deletions(-)

Generally I like this, except the changes to pc_render_add_request() (as previously noted) ...

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 57ec21c5b1ab..c86d0e17d785 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1216,19 +1216,17 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
  		return ret;

  	for_each_ring(waiter, dev_priv, i) {
-		u32 seqno;
  		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  			continue;

-		seqno = i915_gem_request_get_seqno(signaller_req);
  		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
  		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
  					   PIPE_CONTROL_QW_WRITE |
  					   PIPE_CONTROL_FLUSH_ENABLE);
  		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, seqno);
+		intel_ring_emit(signaller, signaller_req->seqno);
  		intel_ring_emit(signaller, 0);
  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -1257,18 +1255,16 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
  		return ret;

  	for_each_ring(waiter, dev_priv, i) {
-		u32 seqno;
  		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  			continue;

-		seqno = i915_gem_request_get_seqno(signaller_req);
  		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
  					   MI_FLUSH_DW_OP_STOREDW);
  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
  					   MI_FLUSH_DW_USE_GTT);
  		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, seqno);
+		intel_ring_emit(signaller, signaller_req->seqno);
  		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  					   MI_SEMAPHORE_TARGET(waiter->id));
  		intel_ring_emit(signaller, 0);
@@ -1299,11 +1295,9 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
  		i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];

  		if (i915_mmio_reg_valid(mbox_reg)) {
-			u32 seqno = i915_gem_request_get_seqno(signaller_req);
-
  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
  			intel_ring_emit_reg(signaller, mbox_reg);
-			intel_ring_emit(signaller, seqno);
+			intel_ring_emit(signaller, signaller_req->seqno);
  		}
  	}

@@ -1338,7 +1332,7 @@ gen6_add_request(struct drm_i915_gem_request *req)

  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+	intel_ring_emit(ring, req->seqno);
  	intel_ring_emit(ring, MI_USER_INTERRUPT);
  	__intel_ring_advance(ring);

@@ -1440,7 +1434,9 @@ static int
  pc_render_add_request(struct drm_i915_gem_request *req)
  {
  	struct intel_engine_cs *ring = req->ring;
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 addr = req->ring->status_page.gfx_addr +
+		(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+	u32 scratch_addr = addr;
  	int ret;

What I don't like is that this will now splat dummy writes across the HWSP. I'd rather the HWSP was kept for communicating /useful/ messages from the GPU to CPU, and the scratch page be used for dummy writes (as it previously was).

If you really needed to switch those writes from scratch to HWSP -- maybe so that the scratch page could be eliminated -- you could #define six separate offsets within the status page to be used for dummy writes, and use those values in the emit() sequence below. But that really wouldn't be pretty :( And since AFAIK we're not planning to get rid of the scratch page, we should continue to use it for /all/ dummy writes.

.Dave.

  	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
@@ -1455,11 +1451,12 @@ pc_render_add_request(struct drm_i915_gem_request *req)
  	if (ret)
  		return ret;

-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_WRITE_FLUSH |
-			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+	intel_ring_emit(ring,
+			GFX_OP_PIPE_CONTROL(4) |
+			PIPE_CONTROL_QW_WRITE |
+			PIPE_CONTROL_WRITE_FLUSH);
+	intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, req->seqno);
  	intel_ring_emit(ring, 0);
  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -1473,12 +1470,12 @@ pc_render_add_request(struct drm_i915_gem_request *req)
  	scratch_addr += 2 * CACHELINE_BYTES;
  	PIPE_CONTROL_FLUSH(ring, scratch_addr);

-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) |
+			PIPE_CONTROL_QW_WRITE |
  			PIPE_CONTROL_WRITE_FLUSH |
-			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
  			PIPE_CONTROL_NOTIFY);
-	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+	intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, req->seqno);
  	intel_ring_emit(ring, 0);
  	__intel_ring_advance(ring);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux