RE: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid

"Sierra Guiza, Alejandro (Alex)" <Alex.Sierra@xxxxxxx> · Tue, 14 Jan 2020 03:21:46 +0000

[AMD Official Use Only - Internal Distribution Only]

Yes, both gfx_v10_0_ring_emit_fence and gfx_v9_0_ring_emit_fence have 8 ring writes. Plus 2 of the flush.

Regards,
Alejandro S.
-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> 
Sent: Monday, January 13, 2020 6:55 PM
To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid

I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That should only be 2 dwords, not 12. The code here should do

     amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);

I think 12 was too much in the original code. Flush + fence should only be 10 dwords, unless I misses something or counted wrong.

Regards,
   Felix

On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> I just pushed the series, but I'll go ahead and create a new patch for this.
> The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then?
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling@xxxxxxx>
> Sent: Monday, January 13, 2020 6:34 PM
> To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx>; 
> amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via 
> pasid
>
> Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ...
>
> On 2020-01-13 3:26 p.m., Alex Sierra wrote:
>> This can be used directly from amdgpu and amdkfd to invalidate TLB 
>> through pasid.
>> It supports gmc v7, v8, v9 and v10.
>>
>> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
>> Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 74 ++++++++++++++++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 +++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 76 +++++++++++++++++++++++++
>>    5 files changed, 223 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index c91dd602d5f1..d3c27a3c43f6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
>>    	/* flush the vm tlb via mmio */
>>    	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
>>    				uint32_t vmhub, uint32_t flush_type);
>> +	/* flush the vm tlb via pasid */
>> +	int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
>> +					uint32_t flush_type, bool all_hub);
>>    	/* flush the vm tlb via ring */
>>    	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
>>    				       uint64_t pd_addr);
>> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
>>    };
>>    
>>    #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
>> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub),
>> (type)))
>> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
>> +	((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>> +	((adev), (pasid), (type), (allhub)))
>>    #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
>>    #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>>    #define amdgpu_gmc_map_mtype(adev, flags)
>> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 5ad89bb6f3ba..8afd05834714 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -30,6 +30,8 @@
>>    #include "hdp/hdp_5_0_0_sh_mask.h"
>>    #include "gc/gc_10_1_0_sh_mask.h"
>>    #include "mmhub/mmhub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_sh_mask.h"
>> +#include "athub/athub_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_offset.h"
>>    #include "dcn/dcn_2_0_0_sh_mask.h"
>>    #include "oss/osssys_5_0_0_offset.h"
>> @@ -37,6 +39,7 @@
>>    #include "navi10_enum.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    
>>    #include "nbio_v2_3.h"
>> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		(!amdgpu_sriov_vf(adev)));
>>    }
>>    
>> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>> +					struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>>    }
>>    
>> +/**
>> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (amdgpu_emu_mode == 0 && ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> You need to allocate more space here for amdgpu_fence_emit_polling.
> Looks like gfx_v10_0_ring_emit_fence needs 8 dwords.
>
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret	&& queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v10_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					     unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct 
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v10_0_map_mtype, diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index f08e5330642d..19d5b133e1d7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs 
>> gmc_v7_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v7_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 6d96d40fbcb8..27d83204fa2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
>>    	return 0;
>>    }
>>    
>> +/**
>> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid;
>> +	unsigned int tmp;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
>> +		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
>> +			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
>> +			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
>> +			RREG32(mmVM_INVALIDATE_RESPONSE);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs 
>> gmc_v8_0_ip_funcs = {
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>>    	.set_prt = gmc_v8_0_set_prt,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index b83c8d745f42..40a496804356 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -38,10 +38,12 @@
>>    #include "dce/dce_12_0_sh_mask.h"
>>    #include "vega10_enum.h"
>>    #include "mmhub/mmhub_1_0_offset.h"
>> +#include "athub/athub_1_0_sh_mask.h"
>>    #include "athub/athub_1_0_offset.h"
>>    #include "oss/osssys_4_0_offset.h"
>>    
>>    #include "soc15.h"
>> +#include "soc15d.h"
>>    #include "soc15_common.h"
>>    #include "umc/umc_6_0_sh_mask.h"
>>    
>> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
>>    		   adev->pdev->device == 0x15d8)));
>>    }
>>    
>> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
>> +					uint8_t vmid, uint16_t *p_pasid) {
>> +	uint32_t value;
>> +
>> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>> +		     + vmid);
>> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>> +
>> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>> +}
>> +
>>    /*
>>     * GART
>>     * VMID 0 is the physical GPU addresses as used by the kernel.
>> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>    	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>>    }
>>    
>> +/**
>> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @pasid: pasid to be flush
>> + *
>> + * Flush the TLB for the requested pasid.
>> + */
>> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>> +					uint16_t pasid, uint32_t flush_type,
>> +					bool all_hub)
>> +{
>> +	int vmid, i;
>> +	signed long r;
>> +	uint32_t seq;
>> +	uint16_t queried_pasid;
>> +	bool ret;
>> +	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> +
>> +	if (adev->in_gpu_reset)
>> +		return -EIO;
>> +
>> +	if (ring->sched.ready) {
>> +		spin_lock(&adev->gfx.kiq.ring_lock);
>> +		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
> Same as above.
>
> Regards,
>     Felix
>
>> +		kiq->pmf->kiq_invalidate_tlbs(ring,
>> +					pasid, flush_type, all_hub);
>> +		amdgpu_fence_emit_polling(ring, &seq);
>> +		amdgpu_ring_commit(ring);
>> +		spin_unlock(&adev->gfx.kiq.ring_lock);
>> +		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>> +		if (r < 1) {
>> +			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> +			return -ETIME;
>> +		}
>> +
>> +		return 0;
>> +	}
>> +
>> +	for (vmid = 1; vmid < 16; vmid++) {
>> +
>> +		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
>> +				&queried_pasid);
>> +		if (ret && queried_pasid == pasid) {
>> +			if (all_hub) {
>> +				for (i = 0; i < adev->num_vmhubs; i++)
>> +					gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +							i, 0);
>> +			} else {
>> +				gmc_v9_0_flush_gpu_tlb(adev, vmid,
>> +						AMDGPU_GFXHUB_0, 0);
>> +			}
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +
>> +}
>> +
>>    static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    					    unsigned vmid, uint64_t pd_addr)
>>    {
>> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct 
>> amdgpu_device *adev,
>>    
>>    static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>>    	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>> +	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
>>    	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>>    	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>>    	.map_mtype = gmc_v9_0_map_mtype,
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx