>-----Original Message----- >From: Christian König <ckoenig.leichtzumerken at gmail.com> >Sent: Thursday, August 16, 2018 9:24 PM >To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org >Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb > >Am 16.08.2018 um 15:05 schrieb Emily Deng: >> To avoid the tlb flush not interrupted by world switch, use kiq and >> one command to do tlb invalidate. >> >> Signed-off-by: Emily Deng <Emily.Deng at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 -- >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 60 >++++++++++++++++++++++++++++++++ >> 3 files changed, 64 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 6265b88..19ef7711 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq { >> AMDGPU_CP_KIQ_IRQ_LAST >> }; >> >> +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >> +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >> +#define MAX_KIQ_REG_TRY 20 >> + >> int amdgpu_device_ip_set_clockgating_state(void *dev, >> enum amd_ip_block_type block_type, >> enum amd_clockgating_state state); >diff --git >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> index 21adb1b6..3885636 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c >> @@ -22,9 +22,6 @@ >> */ >> >> #include "amdgpu.h" >> -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ >> -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ >> -#define MAX_KIQ_REG_TRY 20 >> >> uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) >> { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index ed467de..6c164bd 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -311,6 +311,58 @@ static uint32_t >gmc_v9_0_get_invalidate_req(unsigned int vmid) >> return req; >> } >> >> +signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, >> + uint32_t reg0, uint32_t reg1, >> + uint32_t ref, uint32_t mask) >> +{ >> + signed long r, cnt = 0; >> + unsigned long flags; >> + uint32_t seq; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + struct amdgpu_ring *ring = &kiq->ring; >> + >> + if (!ring->ready) >> + return -EINVAL; >> + >> + spin_lock_irqsave(&kiq->ring_lock, flags); >> + >> + amdgpu_ring_alloc(ring, 32); >> + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, >> + ref, mask); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock_irqrestore(&kiq->ring_lock, flags); >> + >> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); >> + >> + /* don't wait anymore for gpu reset case because this way may >> + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg >> + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will >> + * never return if we keep waiting in virt_kiq_rreg, which cause >> + * gpu_recover() hang there. >> + * >> + * also don't wait anymore for IRQ context >> + * */ >> + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) >> + goto failed_kiq; >> + >> + might_sleep(); >> + >> + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { >> + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); >> + r = amdgpu_fence_wait_polling(ring, seq, >MAX_KIQ_REG_WAIT); >> + } >> + >> + if (cnt > MAX_KIQ_REG_TRY) >> + goto failed_kiq; >> + >> + return 0; >> + >> +failed_kiq: >> + pr_err("failed to invalidate tlb with kiq\n"); >> + return r; >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct >amdgpu_device *adev, >> /* Use register 17 for GART */ >> const unsigned eng = 17; >> unsigned i, j; >> + int r; >> >> spin_lock(&adev->gmc.invalidate_lock); >> >> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct >amdgpu_device *adev, >> struct amdgpu_vmhub *hub = &adev->vmhub[i]; >> u32 tmp = gmc_v9_0_get_invalidate_req(vmid); >> >> + spin_unlock(&adev->gmc.invalidate_lock); > >Well just remove the lock altogether. Taking it and dropping it again >immediately doesn't do anything useful. > >> + r = amdgpu_kiq_reg_write_reg_wait(adev, hub- >>vm_inv_eng0_req + eng, >> + hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); >> + spin_lock(&adev->gmc.invalidate_lock); >> + if (!r) >> + continue; >> + >> WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); >> >> /* Busy wait for ACK.*/ >Since you now use the KIQ to wait for the TLB flush I think we can now remove >the MMIO implementation. Ok. If remove the MMIO implementation, then we don't need adev->gmc.invalidate_lock. > >Regards, >Christian.