1. conditionally flush TLBs after map. 2. add heavy weight TLBs flush after unmap. Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 19 +++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 +++++++++++-------- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++--- 10 files changed, 46 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2560977760b3..8f2d6711e12f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1fcfa172911a..585b50b6009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, + bool *flush_tlb) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1128,7 +1129,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false); + ret = amdgpu_vm_bo_update(adev, bo_va, false, flush_tlb); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1140,7 +1141,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte) + bool no_update_pte, + bool *flush_tlb) { int ret; @@ -1157,7 +1159,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync); + ret = update_gpuvm_pte(mem, entry, sync, flush_tlb); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1687,7 +1689,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) + struct kgd_dev *kgd, struct kgd_mem *mem, + void *drm_priv, bool *flush_tlb) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); @@ -1775,7 +1778,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr); + is_invalid_userptr, flush_tlb); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -2469,7 +2472,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync); + ret = update_gpuvm_pte(mem, attachment, &sync, NULL); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2675,7 +2678,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj); + ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e9f9f462a652..e3df132e53a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); if (r) return r; @@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; @@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 2120a87a949f..eac2fd0048cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2c20bba7dc1a..fed3d44b5ded 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); if (table_freed) - *table_freed = params.table_freed; + *table_freed = *table_freed || params.table_freed; error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear) + bool clear, bool *flush_tlb) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, resv, mapping->start, mapping->last, update_flags, mapping->offset, mem, - pages_addr, last_update, NULL, + pages_addr, last_update, flush_tlb, vram_base_offset); if (r) return r; @@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; } @@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear); + r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 67bba8462e7d..24a63e284a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear); + bool clear, bool *flush_tlb); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 960913a35ee4..136f77cadc2f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; + bool flush_tlb = false; trace_kfd_map_memory_to_gpu_start(p); dev = kfd_device_by_id(GET_GPU_ID(args->handle)); @@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto map_memory_to_gpu_failed; err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); + peer->kgd, (struct kgd_mem *)mem, + peer_pdd->drm_priv, &flush_tlb); if (err) { pr_err("Failed to map to gpu %d/%d\n", i, args->n_devices); @@ -1658,16 +1660,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - for (i = 0; i < args->n_devices; i++) { - peer = kfd_device_by_id(devices_arr[i]); - if (WARN_ON_ONCE(!peer)) - continue; - peer_pdd = kfd_get_process_device_data(peer, p); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - if (!amdgpu_read_lock(peer->ddev, true)) { - kfd_flush_tlb(peer_pdd); - amdgpu_read_unlock(peer->ddev); + if (flush_tlb) { + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + if (!amdgpu_read_lock(peer->ddev, true)) { + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + amdgpu_read_unlock(peer->ddev); + } } } @@ -1766,6 +1770,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, amdgpu_read_unlock(peer->ddev); goto unmap_memory_from_gpu_failed; } + kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); amdgpu_read_unlock(peer->ddev); args->n_success = i+1; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2bd621eee4e0..904b8178c1d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd)); + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) pr_err("Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd)); + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->kgd, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd); + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); } /* Take a safe reference to the mm_struct, which may otherwise diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ecdd5e782b81..edce3ecf207d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev); void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd); +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3995002c582b..9708214116dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, + pdd->drm_priv, NULL); if (err) goto err_map_mem; @@ -2159,7 +2160,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd) +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) { struct kfd_dev *dev = pdd->dev; @@ -2172,7 +2173,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd) pdd->qpd.vmid); } else { amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, - pdd->process->pasid, TLB_FLUSH_LEGACY); + pdd->process->pasid, type); } } -- 2.25.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx