On Tue, Mar 27, 2018 at 11:43 AM, Christian König <ckoenig.leichtzumerken at gmail.com> wrote: > Am 27.03.2018 um 17:37 schrieb Alex Deucher: >> >> On Tue, Mar 27, 2018 at 11:15 AM, Alex Deucher <alexdeucher at gmail.com> >> wrote: >>> >>> On Tue, Mar 27, 2018 at 1:58 AM, Emily Deng <Emily.Deng at amd.com> wrote: >>>> >>>> issue: >>>> the vmflush in KCQ could be preempted (not like GFX ring >>>> which doesn't allow preemption in ring buffer) and this lead >>>> to vm flush fail when there is a world switch during >>>> the vm flush procedure (between write invalidate request >>>> and query invalidate ack) >>>> >>>> fix: >>>> separate vm flush for gfx and compute ring, and use >>>> the new format command in compute's vm flush which >>>> use only one package so no preemption could allowed >>>> >>>> Signed-off-by: Monk Liu <Monk.Liu at amd.com> >>>> Signed-off-by: Emily Deng <Emily.Deng at amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ >>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++++- >>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 +++++++++++++----- >>>> 4 files changed, 25 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> index a7e2229..986659f 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> @@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) >>>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) >>>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), >>>> (v)) >>>> #define amdgpu_ring_emit_reg_wait(r, d, v, m) >>>> (r)->funcs->emit_reg_wait((r), (d), (v), (m)) >>>> +#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m) >>>> (r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m)) >>>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) >>>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) >>>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> index 1d0d250..d85df5d 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> @@ -152,6 +152,8 @@ struct amdgpu_ring_funcs { >>>> void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, >>>> uint32_t val); >>>> void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, >>>> uint32_t val, uint32_t mask); >>>> + void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0, >>>> + uint32_t reg1, uint32_t val, uint32_t >>>> mask); >>> >>> This is pretty ugly. We don't need a new callback for this >>> workaround. It will just confuse things for other IPs. Just call >>> gfx_v9_0_wait_reg_mem() directly if you need it in >>> gmc_v9_0_emit_flush_gpu_tlb(). >> >> Nevermind, I misread the patch and thought the change was in the gfx9 >> module. In this case, I'd rather split this into three patches. >> >> 1. add the new ring callback with a better name. E.g., >> emit_reg_wait_oneshot() > > > Maybe "emit_reg_write_wait()", but I still think that is an absolutely worse > idea. Well apparently the issue is that world switches can't happen between the req and the ack, so this would consolidate the operation to one packet which should avoid that issue. > >> 2. add the new callback implementation to gfx9 and gfx8 (I think gfx8 >> will need this as well since we support sr-iov there too) > > > gfx8 doesn't have the hardware bug which seems to make this necessary, not > does it have the same VMHUB design as gfx9. Oh, right, in this case it's the req/ack engines which were new for soc15. We may want the same fix for sdma4 though. > >> 3. in the gmc8 and 9 code tlb_flush callbacks, check if the ring >> supports the new callback and if so, use that, otherwise, fall back to >> the existing code. > > > Actually I would rather say we provide a stub in amdgpu_ring.c which just > uses the separate write and wait callbacks. yeah, that may be cleaner. Alex > > Christian. > > >> >> >>> Alex >>> >>>> void (*emit_tmz)(struct amdgpu_ring *ring, bool start); >>>> /* priority functions */ >>>> void (*set_priority) (struct amdgpu_ring *ring, >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>> index 1ae3de1..509c9d2 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>>> @@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct >>>> amdgpu_ring *ring, uint32_t reg, >>>> gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); >>>> } >>>> >>>> +static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring >>>> *ring, >>>> + uint32_t reg0, uint32_t reg1, >>>> + uint32_t val, uint32_t mask) >>>> +{ >>>> + gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, >>>> 0x20); >>>> +} >>>> + >>>> static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device >>>> *adev, >>>> enum >>>> amdgpu_interrupt_state state) >>>> { >>>> @@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs >>>> gfx_v9_0_ring_funcs_compute = { >>>> 7 + /* gfx_v9_0_ring_emit_hdp_flush */ >>>> 5 + /* hdp invalidate */ >>>> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ >>>> - SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >>>> + (SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 + >>>> SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >>>> 2 + /* gfx_v9_0_ring_emit_vm_flush */ >>>> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user >>>> fence, vm fence */ >>>> @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs >>>> gfx_v9_0_ring_funcs_compute = { >>>> .set_priority = gfx_v9_0_ring_set_priority_compute, >>>> .emit_wreg = gfx_v9_0_ring_emit_wreg, >>>> .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, >>>> + .emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute, >>>> }; >>>> >>>> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> index e687363..968447d 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >>>> @@ -385,11 +385,19 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct >>>> amdgpu_ring *ring, >>>> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * >>>> vmid), >>>> upper_32_bits(pd_addr)); >>>> >>>> - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); >>>> - >>>> - /* wait for the invalidate to complete */ >>>> - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, >>>> - 1 << vmid, 1 << vmid); >>>> + /* The world switch cannot be allowed to occur while >>>> + some invalidation controller code is waiting for an ack. >>>> + To workaround the hardware restriction, replace the original >>>> + two command with one command for compute ring */ >>>> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && >>>> amdgpu_sriov_vf(adev)) { >> >> This will break gfx 8 with sr-iov. >> >> Alex >> >>>> + amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req + >>>> eng, >>>> + hub->vm_inv_eng0_ack + eng, req, 1 << >>>> vmid); >>>> + } else { >>>> + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, >>>> req); >>>> + /* wait for the invalidate to complete */ >>>> + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + >>>> eng, >>>> + 1 << vmid, 1 << vmid); >>>> + } >>>> >>>> return pd_addr; >>>> } >>>> -- >>>> 2.7.4 >>>> >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx at lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > >