[AMD Official Use Only - Internal Distribution Only] Yes, both gfx_v10_0_ring_emit_fence and gfx_v9_0_ring_emit_fence have 8 ring writes. Plus 2 of the flush. Regards, Alejandro S. -----Original Message----- From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> Sent: Monday, January 13, 2020 6:55 PM To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via pasid I noticed that the invalidate_tlbs_size in patch 3 was also wrong. That should only be 2 dwords, not 12. The code here should do amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); I think 12 was too much in the original code. Flush + fence should only be 10 dwords, unless I misses something or counted wrong. Regards, Felix On 2020-01-13 7:48 p.m., Sierra Guiza, Alejandro (Alex) wrote: > [AMD Official Use Only - Internal Distribution Only] > > I just pushed the series, but I'll go ahead and create a new patch for this. > The .invalidate_tlbs_size, is it based on dword size? Currently is 12, should I need to drop it to 8 then? > > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Sent: Monday, January 13, 2020 6:34 PM > To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx>; > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 5/7] drm/amdgpu: export function to flush TLB via > pasid > > Sorry, I already said, Reviewed-by, but realized there was one more problem. If you haven't submitted yet, please fix that first. Otherwise, please make it a follow-up patch. See inline ... > > On 2020-01-13 3:26 p.m., Alex Sierra wrote: >> This can be used directly from amdgpu and amdkfd to invalidate TLB >> through pasid. >> It supports gmc v7, v8, v9 and v10. >> >> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 >> Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ >> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ >> 5 files changed, 223 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> index c91dd602d5f1..d3c27a3c43f6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h >> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { >> /* flush the vm tlb via mmio */ >> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, >> uint32_t vmhub, uint32_t flush_type); >> + /* flush the vm tlb via pasid */ >> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, >> + uint32_t flush_type, bool all_hub); >> /* flush the vm tlb via ring */ >> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, >> uint64_t pd_addr); >> @@ -216,6 +219,9 @@ struct amdgpu_gmc { >> }; >> >> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) >> ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), >> (type))) >> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ >> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ >> + ((adev), (pasid), (type), (allhub))) >> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) >> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) >> #define amdgpu_gmc_map_mtype(adev, flags) >> (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> index 5ad89bb6f3ba..8afd05834714 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> @@ -30,6 +30,8 @@ >> #include "hdp/hdp_5_0_0_sh_mask.h" >> #include "gc/gc_10_1_0_sh_mask.h" >> #include "mmhub/mmhub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_sh_mask.h" >> +#include "athub/athub_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_offset.h" >> #include "dcn/dcn_2_0_0_sh_mask.h" >> #include "oss/osssys_5_0_0_offset.h" >> @@ -37,6 +39,7 @@ >> #include "navi10_enum.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> >> #include "nbio_v2_3.h" >> @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> (!amdgpu_sriov_vf(adev))); >> } >> >> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( >> + struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -380,6 +396,63 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); >> } >> >> +/** >> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (amdgpu_emu_mode == 0 && ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > You need to allocate more space here for amdgpu_fence_emit_polling. > Looks like gfx_v10_0_ring_emit_fence needs 8 dwords. > > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v10_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, >> .map_mtype = gmc_v10_0_map_mtype, diff --git >> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> index f08e5330642d..19d5b133e1d7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c >> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs >> gmc_v7_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, >> .set_prt = gmc_v7_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> index 6d96d40fbcb8..27d83204fa2b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c >> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) >> return 0; >> } >> >> +/** >> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid; >> + unsigned int tmp; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); >> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && >> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { >> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); >> + RREG32(mmVM_INVALIDATE_RESPONSE); >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs >> gmc_v8_0_ip_funcs = { >> >> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, >> .set_prt = gmc_v8_0_set_prt, >> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> index b83c8d745f42..40a496804356 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c >> @@ -38,10 +38,12 @@ >> #include "dce/dce_12_0_sh_mask.h" >> #include "vega10_enum.h" >> #include "mmhub/mmhub_1_0_offset.h" >> +#include "athub/athub_1_0_sh_mask.h" >> #include "athub/athub_1_0_offset.h" >> #include "oss/osssys_4_0_offset.h" >> >> #include "soc15.h" >> +#include "soc15d.h" >> #include "soc15_common.h" >> #include "umc/umc_6_0_sh_mask.h" >> >> @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, >> adev->pdev->device == 0x15d8))); >> } >> >> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, >> + uint8_t vmid, uint16_t *p_pasid) { >> + uint32_t value; >> + >> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) >> + + vmid); >> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; >> + >> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); >> +} >> + >> /* >> * GART >> * VMID 0 is the physical GPU addresses as used by the kernel. >> @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, >> DRM_ERROR("Timeout waiting for VM flush ACK!\n"); >> } >> >> +/** >> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid >> + * >> + * @adev: amdgpu_device pointer >> + * @pasid: pasid to be flush >> + * >> + * Flush the TLB for the requested pasid. >> + */ >> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, >> + uint16_t pasid, uint32_t flush_type, >> + bool all_hub) >> +{ >> + int vmid, i; >> + signed long r; >> + uint32_t seq; >> + uint16_t queried_pasid; >> + bool ret; >> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; >> + struct amdgpu_kiq *kiq = &adev->gfx.kiq; >> + >> + if (adev->in_gpu_reset) >> + return -EIO; >> + >> + if (ring->sched.ready) { >> + spin_lock(&adev->gfx.kiq.ring_lock); >> + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); > Same as above. > > Regards, > Felix > >> + kiq->pmf->kiq_invalidate_tlbs(ring, >> + pasid, flush_type, all_hub); >> + amdgpu_fence_emit_polling(ring, &seq); >> + amdgpu_ring_commit(ring); >> + spin_unlock(&adev->gfx.kiq.ring_lock); >> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); >> + if (r < 1) { >> + DRM_ERROR("wait for kiq fence error: %ld.\n", r); >> + return -ETIME; >> + } >> + >> + return 0; >> + } >> + >> + for (vmid = 1; vmid < 16; vmid++) { >> + >> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, >> + &queried_pasid); >> + if (ret && queried_pasid == pasid) { >> + if (all_hub) { >> + for (i = 0; i < adev->num_vmhubs; i++) >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + i, 0); >> + } else { >> + gmc_v9_0_flush_gpu_tlb(adev, vmid, >> + AMDGPU_GFXHUB_0, 0); >> + } >> + break; >> + } >> + } >> + >> + return 0; >> + >> +} >> + >> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> unsigned vmid, uint64_t pd_addr) >> { >> @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct >> amdgpu_device *adev, >> >> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { >> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, >> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, >> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, >> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, >> .map_mtype = gmc_v9_0_map_mtype, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx