The GRBM interface is now capable of bursting 1-cycle op per register, a WRITE followed by another WRITE, or a WRITE followed by a READ--much faster than previous muti-cycle per completed-transaction interface. This causes a problem, whereby status registers requiring a read/write by hardware, have a 1-cycle delay, due to the register update having to go through GRBM interface. This patch adds this delay. A one cycle read op is added after updating the invalidate request and before reading the invalidate-ACK status. See also commit 534991731cb5fa94b5519957646cf849ca10d17d. v2: Remove GFX9 and apply only to SDMA ring. Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6e1b25bd1fe7..dedd7e1ab2fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -346,6 +346,13 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + /* Insert a dummy read to delay one cycle after the write REQ, + * and before the ACK inquiry. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_req + eng, 0, 0); + /* wait for the invalidate to complete */ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, 1 << vmid, 1 << vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b8fdb192f6d6..0c41b4fdc58b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, -- 2.23.0.385.gbc12974a89 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx