This will make it possible for amdgpu GEM ioctls to flush TLBs on compute VMs. This removes VMID-based TLB flushing and always uses PASID-based flushing. This still works because it scans the VMID-PASID mapping registers to find the right VMID. It's only slightly less efficient. This is not a production use case. Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 30 +++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++++-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 28 -------------------- 5 files changed, 22 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8816853e50c0..dcbd28e99b5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -726,31 +726,25 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid) -{ - if (adev->family == AMDGPU_FAMILY_AI) { - int i; - - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); - } - - return 0; -} - -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) +int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm, + enum TLB_FLUSH_TYPE type) { + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); bool all_hub = false; + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) + return 0; + if (adev->family == AMDGPU_FAMILY_AI || adev->family == AMDGPU_FAMILY_RV) all_hub = true; - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, type, all_hub); } bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 01ba3589b60a..dcaf69fd833c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -157,10 +157,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); +int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm, + enum TLB_FLUSH_TYPE type); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 03a3314e5b43..171de7da2959 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -290,6 +290,7 @@ struct amdgpu_vm { /* Last finished delayed update */ atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; + atomic64_t kfd_last_flushed_seq; /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfa30d12406b..e029129308e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -715,7 +715,6 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; void *drm_priv; - atomic64_t tlb_seq; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -1344,7 +1343,13 @@ void kfd_signal_reset_event(struct kfd_dev *dev); void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + + amdgpu_amdkfd_flush_tlb(adev, vm, type); +} static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ebabe92f7edb..48d7d30eeb24 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1591,7 +1591,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, return ret; } pdd->drm_priv = drm_file->private_data; - atomic64_set(&pdd->tlb_seq, 0); ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1994,33 +1993,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) -{ - struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); - struct kfd_dev *dev = pdd->dev; - - /* - * It can be that we race and lose here, but that is extremely unlikely - * and the worst thing which could happen is that we flush the changes - * into the TLB once more which is harmless. - */ - if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) - return; - - if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { - /* Nothing to flush until a VMID is assigned, which - * only happens when the first queue is created. - */ - if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, - pdd->qpd.vmid); - } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, - pdd->process->pasid, type); - } -} - struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id) { int i; -- 2.34.1