Am 2021-04-26 um 8:35 p.m. schrieb Zeng, Oak: > Regards, > Oak > > > > On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx on behalf of Felix.Kuehling@xxxxxxx> wrote: > > Use DMABufs with dynamic attachment to DMA-map GTT BOs on other GPUs. > > Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 76 ++++++++++++++++++- > 2 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 63668433f5a6..b706e5a54782 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -41,6 +41,7 @@ struct amdgpu_device; > enum kfd_mem_attachment_type { > KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ > KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ > + KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ > }; > > struct kfd_mem_attachment { > @@ -56,6 +57,7 @@ struct kfd_mem_attachment { > struct kgd_mem { > struct mutex lock; > struct amdgpu_bo *bo; > + struct dma_buf *dmabuf; > struct list_head attachments; > /* protected by amdkfd_process_info.lock */ > struct ttm_validate_buffer validate_list; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 9eeedd0c7920..18a1f9222a59 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -524,6 +524,16 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem, > return ret; > } > > +static int > +kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) > +{ > + struct ttm_operation_ctx ctx = {.interruptible = true}; > + struct amdgpu_bo *bo = attachment->bo_va->base.bo; > + > + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); > + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > How does this work? The function name says this is dma mapping a buffer but from the implementation, it is just a placement and validation Conceptually, calling ttm_bo_validate ensures that the BO is in the specified domain, in this case GTT. Before calling validate, it can be in the CPU domain, which means it may be swapped to disk so it's not GPU accessible. For a DMABuf attachment, the CPU domain means, that the DMABuf is not attached because the underlying memory object may be on the move or swapped out. The actual implementation of the dmabuf attachment is currently in amdgpu_ttm_populate/unpopulate. This is incorrect. Patch 10 in this series fixes that to move the actual dmabuf attachment into amdgpu_ttm_backend_bind/unbind, which is called from amdgpu_bo_move when a BO is moved between the CPU and GTT domains. Regards, Felix > +} > + > static int > kfd_mem_dmamap_attachment(struct kgd_mem *mem, > struct kfd_mem_attachment *attachment) > @@ -533,6 +543,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, > return 0; > case KFD_MEM_ATT_USERPTR: > return kfd_mem_dmamap_userptr(mem, attachment); > + case KFD_MEM_ATT_DMABUF: > + return kfd_mem_dmamap_dmabuf(attachment); > default: > WARN_ON_ONCE(1); > } > @@ -562,6 +574,19 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, > ttm->sg = NULL; > } > > +static void > +kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) > +{ > + struct ttm_operation_ctx ctx = {.interruptible = true}; > + struct amdgpu_bo *bo = attachment->bo_va->base.bo; > + > + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); > + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > + /* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is > + * called > + */ > +} > + > static void > kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, > struct kfd_mem_attachment *attachment) > @@ -572,6 +597,9 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, > case KFD_MEM_ATT_USERPTR: > kfd_mem_dmaunmap_userptr(mem, attachment); > break; > + case KFD_MEM_ATT_DMABUF: > + kfd_mem_dmaunmap_dmabuf(attachment); > + break; > default: > WARN_ON_ONCE(1); > } > @@ -605,6 +633,38 @@ kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, > return 0; > } > > +static int > +kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, > + struct amdgpu_bo **bo) > +{ > + struct drm_gem_object *gobj; > + > + if (!mem->dmabuf) { > + mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base, > + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? > + DRM_RDWR : 0); > + if (IS_ERR(mem->dmabuf)) { > + mem->dmabuf = NULL; > + return PTR_ERR(mem->dmabuf); > + } > + } > + > + gobj = amdgpu_gem_prime_import(&adev->ddev, mem->dmabuf); > + if (IS_ERR(gobj)) > + return PTR_ERR(gobj); > + > + /* Import takes an extra reference on the dmabuf. Drop it now to > + * avoid leaking it. We only need the one reference in > + * kgd_mem->dmabuf. > + */ > + dma_buf_put(mem->dmabuf); > + > + *bo = gem_to_amdgpu_bo(gobj); > + (*bo)->parent = amdgpu_bo_ref(mem->bo); > + > + return 0; > +} > + > /* kfd_mem_attach - Add a BO to a VM > * > * Everything that needs to bo done only once when a BO is first added > @@ -662,8 +722,20 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, > ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); > if (ret) > goto unwind; > + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && > + mem->bo->tbo.type != ttm_bo_type_sg) { > + /* GTT BOs use DMA-mapping ability of dynamic-attach > + * DMA bufs. TODO: The same should work for VRAM on > + * large-BAR GPUs. > + */ > + attachment[i]->type = KFD_MEM_ATT_DMABUF; > + ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); > + if (ret) > + goto unwind; > } else { > - /* FIXME: Need to DMA-map other BO types */ > + /* FIXME: Need to DMA-map other BO types: > + * large-BAR VRAM, doorbells, MMIO remap > + */ > attachment[i]->type = KFD_MEM_ATT_SHARED; > bo[i] = mem->bo; > drm_gem_object_get(&bo[i]->tbo.base); > @@ -1522,6 +1594,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > > /* Free the BO*/ > drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); > + if (mem->dmabuf) > + dma_buf_put(mem->dmabuf); > drm_gem_object_put(&mem->bo->tbo.base); > mutex_destroy(&mem->lock); > kfree(mem); > -- > 2.31.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel