On 2019-10-26 08:09, Koenig, Christian wrote: > Am 26.10.19 um 00:45 schrieb Tuikov, Luben: >> On 2019-10-25 12:19 p.m., Koenig, Christian wrote: >>> Am 25.10.19 um 18:05 schrieb Alex Deucher: >>>> On Fri, Oct 25, 2019 at 2:49 AM Koenig, Christian >>>> <Christian.Koenig@xxxxxxx> wrote: >>>>> Am 24.10.19 um 23:16 schrieb Tuikov, Luben: >>>>>> The GRBM interface is now capable of bursting >>>>>> 1-cycle op per register, a WRITE followed by >>>>>> another WRITE, or a WRITE followed by a READ--much >>>>>> faster than previous muti-cycle per >>>>>> completed-transaction interface. This causes a >>>>>> problem, whereby status registers requiring a >>>>>> read/write by hardware, have a 1-cycle delay, due >>>>>> to the register update having to go through GRBM >>>>>> interface. >>>>>> >>>>>> This patch adds this delay. >>>>>> >>>>>> A one cycle read op is added after updating the >>>>>> invalidate request and before reading the >>>>>> invalidate-ACK status. >>>>> Please completely drop all changes for GFX9 since this patch will most >>>>> likely break SRIOV. >>>>> >>>>> Additional to that please apply the workaround only to SDMA since the CP >>>>> driven engines should handle that in firmware. >> Thank you Christian for reviewing this patch. >> >> This patch stirred quite a bit of noise. So, then, I'll go by >> your last comment above--I suppose this is the desired way to go forward then? > > You most likely broke the SRIOV use case on GFX9 with that, no wonder > that this raised eyebrows. > > As far as I can see this manual workaround is only applicable to the > SDMA on Navi. Did you see the (v2) patch? Regards, Luben > > But we should double check that the CP firmware interface with the > combined write/wait command is correctly used on Navi/GFX10 as well. > IIRC that came in rather late for GFX9, could be that the Navi bringup > branch never had that. > > Regards, > Christian. > >> >> Regards, >> Luben >> >> >>>> I think the CP only handles this in firmware if we use the new TLB >>>> invalidation packet. I don't think it applies it to general register >>>> writes like we do. >>> No, on the CP we should use the combined write/wait command even if we >>> don't use the new specialized VM invalidate command. Everything else >>> won't work with SRIOV. >>> >>> Even if we want to we can't insert an extra read in this combined >>> write/wait command. And if we split up the commands we would break SRIOV >>> once more. >>> >>> So applying this workaround to the CP code doesn't make any sense at all. >>> >>> The only TODO which I can see is that we maybe don't use the combined >>> write/wait command on Navi yet. >>> >>> Christian. >>> >>>> Alex >>>> >>>>> Regards, >>>>> Christian. >>>>> >>>>>> See also commit >>>>>> 534991731cb5fa94b5519957646cf849ca10d17d. >>>>>> >>>>>> Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> >>>>>> --- >>>>>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- >>>>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- >>>>>> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +++++++++ >>>>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++++++ >>>>>> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- >>>>>> 5 files changed, 22 insertions(+), 5 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>>>>> index ac43b1af69e3..0042868dbd53 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>>>>> @@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { >>>>>> 5 + /* COND_EXEC */ >>>>>> 7 + /* PIPELINE_SYNC */ >>>>>> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >>>>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >>>>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + >>>>>> 2 + /* VM_FLUSH */ >>>>>> 8 + /* FENCE for VM_FLUSH */ >>>>>> 20 + /* GDS switch */ >>>>>> @@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { >>>>>> 5 + /* hdp invalidate */ >>>>>> 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ >>>>>> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >>>>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >>>>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + >>>>>> 2 + /* gfx_v10_0_ring_emit_vm_flush */ >>>>>> 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ >>>>>> .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>>>> index 9fe95e7693d5..9a7a717208de 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>>>> @@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { >>>>>> 5 + /* COND_EXEC */ >>>>>> 7 + /* PIPELINE_SYNC */ >>>>>> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >>>>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >>>>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + >>>>>> 2 + /* VM_FLUSH */ >>>>>> 8 + /* FENCE for VM_FLUSH */ >>>>>> 20 + /* GDS switch */ >>>>>> @@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { >>>>>> 5 + /* hdp invalidate */ >>>>>> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ >>>>>> SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >>>>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >>>>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 + >>>>>> 2 + /* gfx_v9_0_ring_emit_vm_flush */ >>>>>> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ >>>>>> .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >>>>>> index 6e1b25bd1fe7..100d526e9a42 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >>>>>> @@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >>>>>> >>>>>> amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); >>>>>> >>>>>> + /* Insert a dummy read to delay one cycle before the ACK >>>>>> + * inquiry. >>>>>> + */ >>>>>> + if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA || >>>>>> + ring->funcs->type == AMDGPU_RING_TYPE_GFX || >>>>>> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) >>>>>> + amdgpu_ring_emit_reg_wait(ring, >>>>>> + hub->vm_inv_eng0_req + eng, 0, 0); >>>>>> + >>>>>> /* wait for the invalidate to complete */ >>>>>> amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, >>>>>> 1 << vmid, 1 << vmid); >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>>>> index 9f2a893871ec..8f3097e45299 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>>>> @@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >>>>>> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), >>>>>> upper_32_bits(pd_addr)); >>>>>> >>>>>> + /* Insert a dummy read to delay one cycle before the ACK >>>>>> + * inquiry. >>>>>> + */ >>>>>> + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || >>>>>> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) >>>>>> + amdgpu_ring_emit_reg_wait(ring, >>>>>> + hub->vm_inv_eng0_req + eng, 0, 0); >>>>>> + >>>>>> amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, >>>>>> hub->vm_inv_eng0_ack + eng, >>>>>> req, 1 << vmid); >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >>>>>> index b8fdb192f6d6..0c41b4fdc58b 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >>>>>> @@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { >>>>>> 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ >>>>>> /* sdma_v5_0_ring_emit_vm_flush */ >>>>>> SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + >>>>>> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + >>>>>> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + >>>>>> 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ >>>>>> .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ >>>>>> .emit_ib = sdma_v5_0_ring_emit_ib, >>>>> _______________________________________________ >>>>> amd-gfx mailing list >>>>> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx