[PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb

Emily.Deng@xxxxxxx (Deng, Emily) · Fri, 17 Aug 2018 02:37:06 +0000



>-----Original Message-----
>From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Deng,
>Emily
>Sent: Friday, August 17, 2018 10:19 AM
>To: Koenig, Christian <Christian.Koenig at amd.com>; amd-
>gfx at lists.freedesktop.org
>Subject: RE: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>
>>-----Original Message-----
>>From: Christian KÃ¶nig <ckoenig.leichtzumerken at gmail.com>
>>Sent: Thursday, August 16, 2018 9:24 PM
>>To: Deng, Emily <Emily.Deng at amd.com>; amd-gfx at lists.freedesktop.org
>>Subject: Re: [PATCH 2/2] drm/amdgpu: use kiq to do invalidate tlb
>>
>>Am 16.08.2018 um 15:05 schrieb Emily Deng:
>>> To avoid the tlb flush not interrupted by world switch, use kiq and
>>> one command to do tlb invalidate.
>>>
>>> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 --
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 60
>>++++++++++++++++++++++++++++++++
>>>   3 files changed, 64 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 6265b88..19ef7711 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -212,6 +212,10 @@ enum amdgpu_kiq_irq {
>>>   	AMDGPU_CP_KIQ_IRQ_LAST
>>>   };
>>>
>>> +#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
>>> +#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>>> +#define MAX_KIQ_REG_TRY 20
>>> +
>>>   int amdgpu_device_ip_set_clockgating_state(void *dev,
>>>   					   enum amd_ip_block_type block_type,
>>>   					   enum amd_clockgating_state state);
>>diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> index 21adb1b6..3885636 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> @@ -22,9 +22,6 @@
>>>    */
>>>
>>>   #include "amdgpu.h"
>>> -#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
>>> -#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
>>> -#define MAX_KIQ_REG_TRY 20
>>>
>>>   uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>>   {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index ed467de..6c164bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -311,6 +311,58 @@ static uint32_t
>>gmc_v9_0_get_invalidate_req(unsigned int vmid)
>>>   	return req;
>>>   }
>>>
>>> +signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>>> +						  uint32_t reg0, uint32_t reg1,
>>> +						  uint32_t ref, uint32_t mask) {
>>> +	signed long r, cnt = 0;
>>> +	unsigned long flags;
>>> +	uint32_t seq;
>>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>> +	struct amdgpu_ring *ring = &kiq->ring;
>>> +
>>> +	if (!ring->ready)
>>> +		return -EINVAL;
>>> +
>>> +	spin_lock_irqsave(&kiq->ring_lock, flags);
>>> +
>>> +	amdgpu_ring_alloc(ring, 32);
>>> +	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
>>> +					    ref, mask);
>>> +	amdgpu_fence_emit_polling(ring, &seq);
>>> +	amdgpu_ring_commit(ring);
>>> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>>> +
>>> +	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>>> +
>>> +	/* don't wait anymore for gpu reset case because this way may
>>> +	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>>> +	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>>> +	 * never return if we keep waiting in virt_kiq_rreg, which cause
>>> +	 * gpu_recover() hang there.
>>> +	 *
>>> +	 * also don't wait anymore for IRQ context
>>> +	 * */
>>> +	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>>> +		goto failed_kiq;
>>> +
>>> +	might_sleep();
>>> +
>>> +	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
>>> +		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>>> +		r = amdgpu_fence_wait_polling(ring, seq,
>>MAX_KIQ_REG_WAIT);
>>> +	}
>>> +
>>> +	if (cnt > MAX_KIQ_REG_TRY)
>>> +		goto failed_kiq;
>>> +
>>> +	return 0;
>>> +
>>> +failed_kiq:
>>> +	pr_err("failed to invalidate tlb with kiq\n");
>>> +	return r;
>>> +}
>>> +
>>>   /*
>>>    * GART
>>>    * VMID 0 is the physical GPU addresses as used by the kernel.
>>> @@ -332,6 +384,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>>amdgpu_device *adev,
>>>   	/* Use register 17 for GART */
>>>   	const unsigned eng = 17;
>>>   	unsigned i, j;
>>> +	int r;
>>>
>>>   	spin_lock(&adev->gmc.invalidate_lock);
>>>
>>> @@ -339,6 +392,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>>amdgpu_device *adev,
>>>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>>>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>>>
>>> +		spin_unlock(&adev->gmc.invalidate_lock);
>>
>>Well just remove the lock altogether. Taking it and dropping it again
>>immediately doesn't do anything useful.
>>
>>> +		r = amdgpu_kiq_reg_write_reg_wait(adev, hub-
>>>vm_inv_eng0_req + eng,
>>> +			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
>>> +		spin_lock(&adev->gmc.invalidate_lock);
>>> +		if (!r)
>>> +			continue;
>>> +
>>>   		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
>>>
>>>   		/* Busy wait for ACK.*/
>>Since you now use the KIQ to wait for the TLB flush I think we can now
>>remove the MMIO implementation.
>Ok. If remove the MMIO implementation, then we don't need adev-
>>gmc.invalidate_lock.
Sorry, just forget one thing, the MMIO path couldn't be removed, as the kiq only 
could be used when the kiq ring is ready for using, so when the kiq is not ready,
 need fallback to the MMIO path.
>>
>>Regards,
>>Christian.
>_______________________________________________
>amd-gfx mailing list
>amd-gfx at lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx