Add BO-type specific helpers functions to DMA-map and unmap kfd_mem_attachments. Implement this functionality for userptrs by creating one SG BO per GPU and filling it with a DMA mapping of the pages from the original mem->bo. Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 8 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 155 ++++++++++++++++-- 2 files changed, 152 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 910c50956e16..fc3514ed1b74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -38,11 +38,17 @@ extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; +enum kfd_mem_attachment_type { + KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ + KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ +}; + struct kfd_mem_attachment { struct list_head list; + enum kfd_mem_attachment_type type; + bool is_mapped; struct amdgpu_bo_va *bo_va; struct amdgpu_device *adev; - bool is_mapped; uint64_t va; uint64_t pte_flags; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 114fbf508707..51502a07fc1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -471,12 +471,117 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +static int +kfd_mem_dmamap_userptr(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + enum dma_data_direction direction = + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *src_ttm = mem->bo->tbo.ttm; + struct ttm_tt *ttm = bo->tbo.ttm; + int ret; + + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); + if (unlikely(!ttm->sg)) + return -ENOMEM; + + if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) + return -EINVAL; + + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */ + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages, + ttm->num_pages, 0, + (u64)ttm->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (unlikely(ret)) + goto release_sg; + + ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); + if (unlikely(ret)) + goto release_sg; + + drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address, + ttm->num_pages); + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto release_sg; + + return 0; + +release_sg: + pr_err("DMA map userptr failed: %d\n", ret); + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; + return ret; +} + +static int +kfd_mem_dmamap_attachment(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + switch (attachment->type) { + case KFD_MEM_ATT_SHARED: + return 0; + case KFD_MEM_ATT_USERPTR: + return kfd_mem_dmamap_userptr(mem, attachment); + default: + WARN_ON_ONCE(1); + } + return -EINVAL; +} + +static void +kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + enum dma_data_direction direction = + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct ttm_operation_ctx ctx = {.interruptible = false}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + + if (unlikely(!ttm->sg)) + return; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); + sg_free_table(ttm->sg); + ttm->sg = NULL; +} + +static void +kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + switch (attachment->type) { + case KFD_MEM_ATT_SHARED: + break; + case KFD_MEM_ATT_USERPTR: + kfd_mem_dmaunmap_userptr(mem, attachment); + break; + default: + WARN_ON_ONCE(1); + } +} + /* kfd_mem_attach - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added * to a VM. It can later be mapped and unmapped many times without * repeating these steps. * + * 0. Create BO for DMA mapping, if needed * 1. Allocate and initialize BO VA entry data structure * 2. Add BO to the VM * 3. Determine ASIC-specific PTE flags @@ -486,10 +591,12 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_vm *vm, bool is_aql) { + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); unsigned long bo_size = mem->bo->tbo.base.size; uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct drm_gem_object *gobj; int i, ret; if (!va) { @@ -507,14 +614,36 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - /* FIXME: For now all attachments use the same BO. This is - * incorrect because one BO can only have one DMA mapping - * for one GPU. We need one BO per GPU, e.g. a DMABuf - * import with dynamic attachment. This will be addressed - * one BO-type at a time in subsequent patches. - */ - bo[i] = mem->bo; - drm_gem_object_get(&bo[i]->tbo.base); + if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && + amdgpu_xgmi_same_hive(adev, bo_adev))) { + /* Mappings on the local GPU and VRAM mappings in the + * local hive share the original BO + */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); + } else if (i > 0) { + /* Multiple mappings on the same GPU share the BO */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = bo[0]; + drm_gem_object_get(&bo[i]->tbo.base); + } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { + /* Create an SG BO to DMA-map userptrs on other GPUs */ + attachment[i]->type = KFD_MEM_ATT_USERPTR; + ret = amdgpu_gem_object_create(adev, bo_size, 1, + AMDGPU_GEM_DOMAIN_CPU, + 0, ttm_bo_type_sg, + mem->bo->tbo.base.resv, + &gobj); + if (ret) + goto unwind; + bo[i] = gem_to_amdgpu_bo(gobj); + } else { + /* FIXME: Need to DMA-map other BO types */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); + } /* Add BO to VM internal data structures */ attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); @@ -557,13 +686,19 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, return ret; } -static void kfd_mem_detach(struct kfd_mem_attachment *attachment) +static void +kfd_mem_detach(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) { struct amdgpu_bo *bo = attachment->bo_va->base.bo; pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va); + /* FIXME: For some reason SG BOs don't get individualized. Do this + * now manually. This is probably not the right place to do this. + */ + if (bo != mem->bo) + bo->tbo.base.resv = &bo->tbo.base._resv; drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment); @@ -1376,7 +1511,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Remove from VM internal data structures */ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) - kfd_mem_detach(entry); + kfd_mem_detach(mem, entry); ret = unreserve_bo_and_vms(&ctx, false, false); -- 2.31.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel