On Tue, Mar 27, 2018 at 11:15 AM, Alex Deucher <alexdeucher at gmail.com> wrote: > On Tue, Mar 27, 2018 at 1:58 AM, Emily Deng <Emily.Deng at amd.com> wrote: >> issue: >> the vmflush in KCQ could be preempted (not like GFX ring >> which doesn't allow preemption in ring buffer) and this lead >> to vm flush fail when there is a world switch during >> the vm flush procedure (between write invalidate request >> and query invalidate ack) >> >> fix: >> separate vm flush for gfx and compute ring, and use >> the new format command in compute's vm flush which >> use only one package so no preemption could allowed >> >> Signed-off-by: Monk Liu <Monk.Liu at amd.com> >> Signed-off-by: Emily Deng <Emily.Deng at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++++- >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 +++++++++++++----- >> 4 files changed, 25 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index a7e2229..986659f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) >> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) >> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) >> #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) >> +#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m) (r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m)) >> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) >> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) >> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> index 1d0d250..d85df5d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >> @@ -152,6 +152,8 @@ struct amdgpu_ring_funcs { >> void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); >> void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, >> uint32_t val, uint32_t mask); >> + void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0, >> + uint32_t reg1, uint32_t val, uint32_t mask); > > This is pretty ugly. We don't need a new callback for this > workaround. It will just confuse things for other IPs. Just call > gfx_v9_0_wait_reg_mem() directly if you need it in > gmc_v9_0_emit_flush_gpu_tlb(). > > Alex > >> void (*emit_tmz)(struct amdgpu_ring *ring, bool start); >> /* priority functions */ >> void (*set_priority) (struct amdgpu_ring *ring, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index 1ae3de1..509c9d2 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >> gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); >> } >> >> +static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring *ring, >> + uint32_t reg0, uint32_t reg1, >> + uint32_t val, uint32_t mask) >> +{ >> + gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20); >> +} >> + >> static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, >> enum amdgpu_interrupt_state state) >> { >> @@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { >> 7 + /* gfx_v9_0_ring_emit_hdp_flush */ >> 5 + /* hdp invalidate */ >> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ >> - SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + >> + (SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 + >> SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + >> 2 + /* gfx_v9_0_ring_emit_vm_flush */ >> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ >> @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { >> .set_priority = gfx_v9_0_ring_set_priority_compute, >> .emit_wreg = gfx_v9_0_ring_emit_wreg, >> .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, >> + .emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute, >> }; >> >> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index e687363..968447d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -385,11 +385,19 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), >> upper_32_bits(pd_addr)); >> >> - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); >> - >> - /* wait for the invalidate to complete */ >> - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, >> - 1 << vmid, 1 << vmid); >> + /* The world switch cannot be allowed to occur while >> + some invalidation controller code is waiting for an ack. >> + To workaround the hardware restriction, replace the original >> + two command with one command for compute ring */ >> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && amdgpu_sriov_vf(adev)) { >> + amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req + eng, >> + hub->vm_inv_eng0_ack + eng, req, 1 << vmid); Also, it looks like this should be safe to do for all cases. It basically just moves from a two packet solution to a one packet solution. Alex >> + } else { >> + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); >> + /* wait for the invalidate to complete */ >> + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, >> + 1 << vmid, 1 << vmid); >> + } >> >> return pd_addr; >> } >> -- >> 2.7.4 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx