[PATCH] drm/amdgpu/gfx6: flush caches after IB with the correct vmid

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Nicolai Hähnle <nicolai.haehnle@xxxxxxx>

Bring the code in line with what the radeon module does.

Without this change, the fence following the IB may be signalled
to the CPU even though some data written by shaders may not have
been written back yet.

This change fixes the OpenGL CTS test
GL45-CTS.gtf32.GL3Tests.packed_pixels.packed_pixels_pbo on Verde.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
--
So, I'm not too familiar with the details of these syncs, but the radeon
module effectively does:

- IB
- SURFACE_SYNC (vm_id of IB)
- SURFACE_SYNC (vm_id == 0)
- EVENT_WRITE_EOP (kernel fence)

While amdgpu now does (with this change):

- IB
- SURFACE_SYNC (vm_id of IB) <-- this was missing
- SURFACE_SYNC (vm_id == 0)
- EVENT_WRITE_EOP (kernel fence)
- SURFACE_SYNC (vm_id == 0)
- EVENT_WRITE_EOP (user fence)

It seems like at least the second SURFACE_SYNC (vm_id == 0) should be
redundant, so the question is whether the SURFACE_SYNC (vm_id == 0)
should be rearranged somehow, perhaps also added to the IB emission.
But for better bisectability, that should probably be a separate
change.

Cheers,
Nicolai
---
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 03d2a0a..c4f444d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1922,20 +1922,35 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 	control |= ib->length_dw | (vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 			  (2 << 0) |
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	amdgpu_ring_write(ring, control);
+
+	if (!(ib->flags & AMDGPU_IB_FLAG_CE)) {
+		/* flush read cache over gart for this vmid */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, (mmCP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START));
+		amdgpu_ring_write(ring, vm_id);
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+		amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+				  PACKET3_TC_ACTION_ENA |
+				  PACKET3_SH_KCACHE_ACTION_ENA |
+				  PACKET3_SH_ICACHE_ACTION_ENA);
+		amdgpu_ring_write(ring, 0xFFFFFFFF);
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, 10); /* poll interval */
+	}
 }
 
 /**
  * gfx_v6_0_ring_test_ib - basic ring IB test
  *
  * @ring: amdgpu_ring structure holding ring information
  *
  * Allocate an IB and execute it on the gfx ring (SI).
  * Provides a basic gfx ring test to verify that IBs are working.
  * Returns 0 on success, error on failure.
@@ -3631,21 +3646,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 	.get_rptr = gfx_v6_0_ring_get_rptr,
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
 	.emit_frame_size =
 		5 + /* gfx_v6_0_ring_emit_hdp_flush */
 		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
 		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
-	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
+	.emit_ib_size = 6 + 8, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
 	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
 	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
@@ -3657,21 +3672,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
 	.nop = 0x80000000,
 	.get_rptr = gfx_v6_0_ring_get_rptr,
 	.get_wptr = gfx_v6_0_ring_get_wptr,
 	.set_wptr = gfx_v6_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		5 + /* gfx_v6_0_ring_emit_hdp_flush */
 		5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		17 + /* gfx_v6_0_ring_emit_vm_flush */
 		14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
-	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
+	.emit_ib_size = 6 + 8, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
 	.emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
 	.emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v6_0_ring_test_ring,
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 };
-- 
2.9.3



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux