From: Philip Yang <Philip.Yang@xxxxxxx> Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap svm range system memory pages address to GPUs. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 232 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 + 2 files changed, 233 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 55500ec4972f..3c4a036609c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -534,6 +534,229 @@ svm_range_split_add_front(struct svm_range *prange, struct svm_range *new, return 0; } +static uint64_t +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) +{ + uint32_t flags = prange->flags; + uint32_t mapping_flags; + uint64_t pte_flags; + + pte_flags = AMDGPU_PTE_VALID; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + + mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + + if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) + mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; + if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & KFD_IOCTL_SVM_FLAG_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + /* TODO: add CHIP_ARCTURUS new flags for vram mapping */ + + pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); + + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags); + + pr_debug("PTE flags 0x%llx\n", pte_flags); + + return pte_flags; +} + +static int +svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct svm_range *prange, struct dma_fence **fence) +{ + uint64_t init_pte_value = 0; + uint64_t start; + uint64_t last; + + start = prange->it_node.start; + last = prange->it_node.last; + + pr_debug("svms 0x%p [0x%llx 0x%llx]\n", prange->svms, start, last); + + return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, + start, last, init_pte_value, 0, + NULL, NULL, fence); +} + +static int +svm_range_unmap_from_gpus(struct svm_range *prange) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + struct kfd_process_device *pdd; + struct dma_fence *fence = NULL; + struct amdgpu_device *adev; + struct kfd_process *p; + struct kfd_dev *dev; + uint32_t gpuidx; + int r = 0; + + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("unmap from gpu idx 0x%x\n", gpuidx); + r = kfd_process_device_from_gpuidx(p, gpuidx, &dev); + if (r) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) + return -EINVAL; + + adev = (struct amdgpu_device *)dev->kgd; + + r = svm_range_unmap_from_gpu(adev, pdd->vm, prange, &fence); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) + break; + } + + amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + p->pasid); + } + + return r; +} + +static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +static int +svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct svm_range *prange, bool reserve_vm, + struct dma_fence **fence) +{ + struct amdgpu_bo *root; + dma_addr_t *pages_addr; + uint64_t pte_flags; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->it_node.start, prange->it_node.last); + + if (reserve_vm) { + root = amdgpu_bo_ref(vm->root.base.bo); + r = amdgpu_bo_reserve(root, true); + if (r) { + pr_debug("failed %d to reserve root bo\n", r); + amdgpu_bo_unref(&root); + goto out; + } + r = amdgpu_vm_validate_pt_bos(adev, vm, svm_range_bo_validate, + NULL); + if (r) { + pr_debug("failed %d validate pt bos\n", r); + goto unreserve_out; + } + } + + prange->mapping.start = prange->it_node.start; + prange->mapping.last = prange->it_node.last; + prange->mapping.offset = 0; + pte_flags = svm_range_get_pte_flags(adev, prange); + prange->mapping.flags = pte_flags; + pages_addr = prange->pages_addr; + + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL, + prange->mapping.start, + prange->mapping.last, pte_flags, + prange->mapping.offset, NULL, + pages_addr, &vm->last_update); + if (r) { + pr_debug("failed %d to map to gpu 0x%lx\n", r, + prange->it_node.start); + goto unreserve_out; + } + + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) { + pr_debug("failed %d to update directories 0x%lx\n", r, + prange->it_node.start); + goto unreserve_out; + } + + if (fence) + *fence = dma_fence_get(vm->last_update); + +unreserve_out: + if (reserve_vm) { + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + } + +out: + return r; +} + +static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct kfd_process *p; + struct kfd_dev *dev; + struct dma_fence *fence = NULL; + uint32_t gpuidx; + int r = 0; + + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + r = kfd_process_device_from_gpuidx(p, gpuidx, &dev); + if (r) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) + return -EINVAL; + adev = (struct amdgpu_device *)dev->kgd; + + r = svm_range_map_to_gpu(adev, pdd->vm, prange, reserve_vm, + &fence); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) { + pr_debug("failed %d to dma fence wait\n", r); + break; + } + } + } + + return r; +} + struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -750,6 +973,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, unsigned long start, */ list_for_each_entry_safe(prange, tmp, &update_list, update_list) { list_del(&prange->list); + svm_range_unmap_from_gpus(prange); mutex_lock(&svms->free_list_lock); list_add_tail(&prange->remove_list, &svms->free_list); mutex_unlock(&svms->free_list_lock); @@ -991,8 +1215,14 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, } r = svm_range_validate(mm, prange); - if (r) + if (r) { pr_debug("failed %d to validate svm range\n", r); + goto out_unlock; + } + + r = svm_range_map_to_gpus(prange, true); + if (r) + pr_debug("failed %d to map svm range\n", r); out_unlock: if (r) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 4d394f72eefc..fb68b5ee54f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -42,6 +42,7 @@ * @update_list:link list node used to add to update_list * @remove_list:link list node used to add to remove list * @hmm_range: hmm range structure used by hmm_range_fault to get system pages + * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages * @pages_addr: list of system memory physical page address * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* @@ -63,6 +64,7 @@ struct svm_range { struct list_head update_list; struct list_head remove_list; struct hmm_range *hmm_range; + struct amdgpu_bo_va_mapping mapping; uint64_t npages; dma_addr_t *pages_addr; uint32_t flags; -- 2.29.2 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx