Am 09.08.19 um 06:21 schrieb Zeng, Oak: > This is for kfd to reuse amdgpu TLB invalidation function. > On gfx10, kfd only needs to flush TLB on gfx hub but not > on mm hub. So export a function for KFD flush TLB only on > specific hub. > > Change-Id: I58ff00969f88438cfd3dc7e9deb7bff0c1bb4133 > Signed-off-by: Oak Zeng <Oak.Zeng@xxxxxxx> Reviewed-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 9 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 ++++-- > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 6 +- > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 6 +- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 6 +- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 69 ++++++++++++----------- > 9 files changed, 78 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index 2695925..741a3c5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -670,7 +670,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, > int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) > { > struct amdgpu_device *adev = (struct amdgpu_device *) kgd; > - int vmid; > + int vmid, i; > struct amdgpu_ring *ring = &adev->gfx.kiq.ring; > uint32_t flush_type = 0; > > @@ -689,8 +689,9 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) > if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { > if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) > == pasid) { > - amdgpu_gmc_flush_gpu_tlb(adev, vmid, > - flush_type); > + for (i = 0; i < adev->num_vmhubs; i++) > + amdgpu_gmc_flush_gpu_tlb(adev, vmid, > + i, flush_type); > break; > } > } > @@ -702,6 +703,7 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) > int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) > { > struct amdgpu_device *adev = (struct amdgpu_device *) kgd; > + int i; > > if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { > pr_err("non kfd vmid %d\n", vmid); > @@ -723,7 +725,9 @@ int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) > * TODO 2: support range-based invalidation, requires kfg2kgd > * interface change > */ > - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); > + for (i = 0; i < adev->num_vmhubs; i++) > + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > index 6d11e17..a67ffff 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c > @@ -248,7 +248,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, > } > mb(); > amdgpu_asic_flush_hdp(adev, NULL); > - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); > + for (i = 0; i < adev->num_vmhubs; i++) > + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); > + > return 0; > } > > @@ -309,7 +311,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, > #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS > unsigned i,t,p; > #endif > - int r; > + int r, i; > > if (!adev->gart.ready) { > WARN(1, "trying to bind memory to uninitialized GART !\n"); > @@ -333,7 +335,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, > > mb(); > amdgpu_asic_flush_hdp(adev, NULL); > - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); > + for (i = 0; i < adev->num_vmhubs; i++) > + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index 071145a..8f2699e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -89,8 +89,8 @@ struct amdgpu_vmhub { > */ > struct amdgpu_gmc_funcs { > /* flush the vm tlb via mmio */ > - void (*flush_gpu_tlb)(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type); > + void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type); > /* flush the vm tlb via ring */ > uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, > uint64_t pd_addr); > @@ -180,7 +180,7 @@ struct amdgpu_gmc { > struct ras_common_if *ras_if; > }; > > -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) > +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) > #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) > #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) > #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 9db4c4b..1adde85 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -1735,9 +1735,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev) > > static void gfx_v10_0_init_pg(struct amdgpu_device *adev) > { > + int i; > + > gfx_v10_0_init_csb(adev); > > - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); > + for (i = 0; i < adev->num_vmhubs; i++) > + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); > > /* TODO: init power gating */ > return; > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 4e3ac10..b8afb12 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -229,8 +229,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > * > * Flush the TLB for the requested page table. > */ > -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type) > +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > struct dma_fence *fence; > @@ -243,7 +243,14 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, > > mutex_lock(&adev->mman.gtt_window_lock); > > - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); > + if (vmhub == AMDGPU_MMHUB_0) { > + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); > + mutex_unlock(&adev->mman.gtt_window_lock); > + return; > + } > + > + BUG_ON(vmhub != AMDGPU_GFXHUB_0); > + > if (!adev->mman.buffer_funcs_enabled || > !adev->ib_pool_ready || > adev->asic_type > CHIP_NAVI14 || > @@ -773,7 +780,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) > > gfxhub_v2_0_set_fault_enable_default(adev, value); > mmhub_v2_0_set_fault_enable_default(adev, value); > - gmc_v10_0_flush_gpu_tlb(adev, 0, 0); > + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); > + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); > > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > index b06d876..564fb1c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > @@ -359,8 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) > return 0; > } > > -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type) > +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > } > @@ -568,7 +568,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) > else > gmc_v6_0_set_fault_enable_default(adev, true); > > - gmc_v6_0_flush_gpu_tlb(adev, 0, 0); > + gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); > dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index 75aa333..9e6a233 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -430,8 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) > * > * Flush the TLB for the requested page table (CIK). > */ > -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type) > +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > /* bits 0-15 are the VM contexts0-15 */ > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > @@ -674,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) > WREG32(mmCHUB_CONTROL, tmp); > } > > - gmc_v7_0_flush_gpu_tlb(adev, 0, 0); > + gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 8bf2ba3..f7d6a07 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -632,8 +632,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) > * > * Flush the TLB for the requested page table (VI). > */ > -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type) > +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > /* bits 0-15 are the VM contexts0-15 */ > WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); > @@ -918,7 +918,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) > else > gmc_v8_0_set_fault_enable_default(adev, true); > > - gmc_v8_0_flush_gpu_tlb(adev, 0, 0); > + gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), > (unsigned long long)table_addr); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index a2aa35e..f862366 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -448,44 +448,45 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, > * > * Flush the TLB for the requested page table using certain type. > */ > -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, > - uint32_t vmid, uint32_t flush_type) > +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > const unsigned eng = 17; > - unsigned i, j; > + u32 j, tmp; > + struct amdgpu_vmhub *hub; > > - for (i = 0; i < adev->num_vmhubs; ++i) { > - struct amdgpu_vmhub *hub = &adev->vmhub[i]; > - u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); > + BUG_ON(vmhub >= adev->num_vmhubs); > > - /* This is necessary for a HW workaround under SRIOV as well > - * as GFXOFF under bare metal > - */ > - if (adev->gfx.kiq.ring.sched.ready && > - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && > - !adev->in_gpu_reset) { > - uint32_t req = hub->vm_inv_eng0_req + eng; > - uint32_t ack = hub->vm_inv_eng0_ack + eng; > - > - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, > - 1 << vmid); > - continue; > - } > + hub = &adev->vmhub[vmhub]; > + tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); > > - spin_lock(&adev->gmc.invalidate_lock); > - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); > - for (j = 0; j < adev->usec_timeout; j++) { > - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); > - if (tmp & (1 << vmid)) > - break; > - udelay(1); > - } > - spin_unlock(&adev->gmc.invalidate_lock); > - if (j < adev->usec_timeout) > - continue; > + /* This is necessary for a HW workaround under SRIOV as well > + * as GFXOFF under bare metal > + */ > + if (adev->gfx.kiq.ring.sched.ready && > + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && > + !adev->in_gpu_reset) { > + uint32_t req = hub->vm_inv_eng0_req + eng; > + uint32_t ack = hub->vm_inv_eng0_ack + eng; > + > + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, > + 1 << vmid); > + return; > + } > > - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); > + spin_lock(&adev->gmc.invalidate_lock); > + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); > + for (j = 0; j < adev->usec_timeout; j++) { > + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); > + if (tmp & (1 << vmid)) > + break; > + udelay(1); > } > + spin_unlock(&adev->gmc.invalidate_lock); > + if (j < adev->usec_timeout) > + return; > + > + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); > } > > static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > @@ -1239,7 +1240,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) > */ > static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) > { > - int r; > + int r, i; > bool value; > u32 tmp; > > @@ -1295,7 +1296,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) > mmhub_v9_4_set_fault_enable_default(adev, value); > else > mmhub_v1_0_set_fault_enable_default(adev, value); > - gmc_v9_0_flush_gpu_tlb(adev, 0, 0); > + > + for (i = 0; i < adev->num_vmhubs; ++i) > + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); > > DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", > (unsigned)(adev->gmc.gart_size >> 20), _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx