On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian <Christian.Koenig@xxxxxxx> wrote: > > Am 24.10.19 um 23:16 schrieb Tuikov, Luben: > > The GRBM interface is now capable of bursting > > 1-cycle op per register, a WRITE followed by > > another WRITE, or a WRITE followed by a READ--much > > faster than previous muti-cycle per > > completed-transaction interface. This causes a > > problem, whereby status registers requiring a > > read/write by hardware, have a 1-cycle delay, due > > to the register update having to go through GRBM > > interface. > > > > This patch adds this delay. > > > > A one cycle read op is added after updating the > > invalidate request and before reading the > > invalidate-ACK status. > > Please completely drop all changes for GFX9 since this patch will most > likely break SRIOV. > > Additional to that please apply the workaround only to SDMA since the CP > driven engines should handle that in firmware. I think the CP only handles this in firmware if we use the new TLB invalidation packet. I don't think it applies it to general register writes like we do. Alex > > Regards, > Christian. > > > > > See also commit > > 534991731cb5fa94b5519957646cf849ca10d17d. > > > > Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +++++++++ > > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++++++ > > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- > > 5 files changed, 22 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index ac43b1af69e3..0042868dbd53 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { > > 5 + /* COND_EXEC */ > > 7 + /* PIPELINE_SYNC */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + > > 2 + /* VM_FLUSH */ > > 8 + /* FENCE for VM_FLUSH */ > > 20 + /* GDS switch */ > > @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > > 5 + /* hdp invalidate */ > > 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + > > 2 + /* gfx_v10_0_ring_emit_vm_flush */ > > 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ > > .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > index 9fe95e7693d5..9a7a717208de 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > > 5 + /* COND_EXEC */ > > 7 + /* PIPELINE_SYNC */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + > > 2 + /* VM_FLUSH */ > > 8 + /* FENCE for VM_FLUSH */ > > 20 + /* GDS switch */ > > @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > > 5 + /* hdp invalidate */ > > 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + > > 2 + /* gfx_v9_0_ring_emit_vm_flush */ > > 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ > > .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > > index 6e1b25bd1fe7..100d526e9a42 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > > @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > > > > amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); > > > > + /* Insert a dummy read to delay one cycle before the ACK > > + * inquiry. > > + */ > > + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA || > > + ring->funcs->type == AMDGPU_RING_TYPE_GFX || > > + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) > > + amdgpu_ring_emit_reg_wait(ring, > > + hub->vm_inv_eng0_req + eng, 0, 0); > > + > > /* wait for the invalidate to complete */ > > amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, > > 1 << vmid, 1 << vmid); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > index 9f2a893871ec..8f3097e45299 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > > @@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > > amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), > > upper_32_bits(pd_addr)); > > > > + /* Insert a dummy read to delay one cycle before the ACK > > + * inquiry. > > + */ > > + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || > > + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) > > + amdgpu_ring_emit_reg_wait(ring, > > + hub->vm_inv_eng0_req + eng, 0, 0); > > + > > amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, > > hub->vm_inv_eng0_ack + eng, > > req, 1 << vmid); > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > index b8fdb192f6d6..0c41b4fdc58b 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > > @@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { > > 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ > > /* sdma_v5_0_ring_emit_vm_flush */ > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + > > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + > > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + > > 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ > > .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ > > .emit_ib = sdma_v5_0_ring_emit_ib, > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx