Re: [PATCH] drm/amdgpu: add support of burst nop for gfx10

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 30.07.24 um 07:21 schrieb Sunil Khatri:
Problem:
Till now we are adding NOP packet one by one
i.e if we need N nop packets for padding we are adding
N NOP packets in the ring which does not use the HW
efficiently.

Solution:
Use the data block of the NOP packet for NOP packets
up to the max no of NOPS HW support. Using this HW would
skip passing the information to CP and it skips over
N packets assuming NOP packets.

Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 +++++++++++++++++++---
  1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 853084a2ce7f..01f98e2401ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9397,6 +9397,22 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
  	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
  }
+static void amdgpu_gfx10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	if (count == 0 || count > 0x3fff) {
+		DRM_ERROR("Invalid NOP's pkt count\n");
+		return;
+	}

Please drop that, we should not have parameter validation in the backend. That's the job of the frontend and middleware.

+
+	for (i = 0; i < count; i++)
+		if (count == 1)

That looks incorrect to me. You should probably test (i == 0) here, or even better move that outside of the loop.

Regards,
Christian.

+			amdgpu_ring_write(ring, ring->funcs->nop | PACKET3(PACKET3_NOP, count - 1));
+		else
+			amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
  static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
  {
  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -9588,7 +9604,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
  	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
  	.test_ring = gfx_v10_0_ring_test_ring,
  	.test_ib = gfx_v10_0_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = amdgpu_gfx10_ring_insert_nop,
  	.pad_ib = amdgpu_ring_generic_pad_ib,
  	.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
  	.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
@@ -9629,7 +9645,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
  	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
  	.test_ring = gfx_v10_0_ring_test_ring,
  	.test_ib = gfx_v10_0_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = amdgpu_gfx10_ring_insert_nop,
  	.pad_ib = amdgpu_ring_generic_pad_ib,
  	.emit_wreg = gfx_v10_0_ring_emit_wreg,
  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
@@ -9659,7 +9675,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
  	.emit_fence = gfx_v10_0_ring_emit_fence_kiq,
  	.test_ring = gfx_v10_0_ring_test_ring,
  	.test_ib = gfx_v10_0_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = amdgpu_gfx10_ring_insert_nop,
  	.pad_ib = amdgpu_ring_generic_pad_ib,
  	.emit_rreg = gfx_v10_0_ring_emit_rreg,
  	.emit_wreg = gfx_v10_0_ring_emit_wreg,




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux