[AMD Official Use Only - AMD Internal Distribution Only] Ignore. Will be pushing new patch set. -----Original Message----- From: Sunil Khatri <sunil.khatri@xxxxxxx> Sent: Tuesday, July 30, 2024 10:52 AM To: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Pelloux-Prayer@xxxxxxxxxxxxxxxxxxxxxxxx; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-prayer@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Khatri, Sunil <Sunil.Khatri@xxxxxxx> Subject: [PATCH] drm/amdgpu: add support of burst nop for gfx10 Problem: Till now we are adding NOP packet one by one i.e if we need N nop packets for padding we are adding N NOP packets in the ring which does not use the HW efficiently. Solution: Use the data block of the NOP packet for NOP packets up to the max no of NOPS HW support. Using this HW would skip passing the information to CP and it skips over N packets assuming NOP packets. Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 853084a2ce7f..01f98e2401ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9397,6 +9397,22 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void amdgpu_gfx10_ring_insert_nop(struct amdgpu_ring *ring, +uint32_t count) { + int i; + + if (count == 0 || count > 0x3fff) { + DRM_ERROR("Invalid NOP's pkt count\n"); + return; + } + + for (i = 0; i < count; i++) + if (count == 1) + amdgpu_ring_write(ring, ring->funcs->nop | PACKET3(PACKET3_NOP, count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); } + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9588,7 +9604,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = amdgpu_gfx10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, @@ -9629,7 +9645,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = amdgpu_gfx10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, @@ -9659,7 +9675,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_fence = gfx_v10_0_ring_emit_fence_kiq, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = amdgpu_gfx10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, -- 2.34.1