Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling

Felix Kuehling <felix.kuehling@xxxxxxx> · Mon, 25 Jul 2022 21:48:51 -0400

Am 2022-07-25 um 20:40 schrieb Lang Yu:
On 07/25/ , Felix Kuehling wrote:
Am 2022-07-25 um 20:15 schrieb Lang Yu:
On 07/25/ , Felix Kuehling wrote:
Am 2022-07-25 um 06:32 schrieb Lang Yu:
We have memory leak issue in current implenmention, i.e.,
the allocated struct kgd_mem memory is not handled properly.

The idea is always creating a buffer object when importing
a gfx BO based dmabuf.

Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx>
---
    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
    1 file changed, 70 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b3806ebe5ef7..c1855b72a3f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
    	u32 alloc_flags = bo->kfd_bo->alloc_flags;
    	u64 size = amdgpu_bo_size(bo);
-	unreserve_mem_limit(adev, size, alloc_flags);
+	if (!bo->kfd_bo->is_imported)
+		unreserve_mem_limit(adev, size, alloc_flags);
    	kfree(bo->kfd_bo);
    }
@@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
    	}
    }
+static struct drm_gem_object*
+amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *abo;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
I'd rather remove this limitation. We should be able to use any DMABuf with
KFD. This check was added when we basically sidestepped all the dmabuf
attachment code and just extracted the amdgpu_bo ourselves. I don't think we
want to keep doing that.
This limitation here is to just reference the gobj if it is an amdgpu bo
and from same device instead of creating a gobj when importing a dmabuf.

Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
import" sent to amd-gfx on March 16. I'm about to send an updated version of
this as part of upstream RDMA support soon.
The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
memory leak issue. Looking forward to the updated version. Thanks!
Maybe we're trying to fix different problems. Can you give a more detailed
explanation of the memory leak you're seeing? It's not obvious to me.
The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
never be freed.

True. With the current upstream driver there is no way this can happen, 
because we don't have an API for KFD to export a dmabuf in a way that 
could later be imported. But with the RDMA and IPC features we're 
working on, this becomes a real possibility.

Your solution is to ensure that the dmabuf import always creates a new 
amdgpu_bo. But that can add a lot of overhead. How about this idea: In 
amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after 
drm_gem_object_put:

	if (mem->bo->kfd_bo != mem)
		kfree(mem);

That way amdgpu_amdkfd_release_notify would only be responsible for 
freeing the original kgd_mem. Any imports will be freed explicitly.

Regards,
  Felix



Regards,
Lang

The problem I'm trying to solve is, to ensure that each exported BO only has
a single dmabuf because we run into problems with GEM if we have multiple
dmabufs pointing to the same GEM object. That also enables re-exporting
dmabufs of imported BOs. At the same time I'm removing any limitations of
the types of BOs we can import, and trying to eliminate any custom dmabuf
handling in KFD.

Regards,
   Felix


Regards,
Lang

Regards,
    Felix


+		gobj = dma_buf->priv;
+		abo = gem_to_amdgpu_bo(gobj);
+		if (gobj->dev == dev && abo->kfd_bo) {
+			drm_gem_object_get(gobj);
+			return gobj;
+		}
+	}
+
+	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
+}
+
    static int
    kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
    		      struct amdgpu_bo **bo)
@@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
    		}
    	}
-	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
    	if (IS_ERR(gobj))
    		return PTR_ERR(gobj);
@@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
    {
    	struct amdkfd_process_info *process_info = mem->process_info;
    	unsigned long bo_size = mem->bo->tbo.base.size;
+	bool is_imported = false;
+	struct drm_gem_object *imported_gobj;
    	struct kfd_mem_attachment *entry, *tmp;
    	struct bo_vm_reservation_context ctx;
    	struct ttm_validate_buffer *bo_list_entry;
    	unsigned int mapped_to_gpu_memory;
    	int ret;
-	bool is_imported = false;
    	mutex_lock(&mem->lock);
@@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
    	}
    	/* Free the BO*/
-	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	if (!is_imported) {
+		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	} else {
+		imported_gobj = mem->dmabuf->priv;
+		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+	}
+
    	if (mem->dmabuf)
    		dma_buf_put(mem->dmabuf);
    	mutex_destroy(&mem->lock);
@@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
    				      uint64_t *mmap_offset)
    {
    	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
-	struct amdgpu_bo *bo;
+	struct drm_gem_object *imported_gobj, *gobj;
+	struct amdgpu_bo *imported_bo, *bo;
    	int ret;
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		/* Can't handle non-graphics buffers */
+	/*
+	 * Can't handle non-graphics buffers and
+	 * buffers from other devices
+	 */
+	imported_gobj = dma_buf->priv;
+	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
+	    drm_to_adev(imported_gobj->dev) != adev)
    		return -EINVAL;
-	obj = dma_buf->priv;
-	if (drm_to_adev(obj->dev) != adev)
-		/* Can't handle buffers from other devices */
+	/* Only VRAM and GTT BOs are supported */
+	imported_bo = gem_to_amdgpu_bo(imported_gobj);
+	if (!(imported_bo->preferred_domains &
+	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
    		return -EINVAL;
-	bo = gem_to_amdgpu_bo(obj);
-	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT)))
-		/* Only VRAM and GTT BOs are supported */
-		return -EINVAL;
+	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
+	if (ret)
+		return ret;
-	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem)
-		return -ENOMEM;
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
+	if (IS_ERR(gobj)) {
+		ret = PTR_ERR(gobj);
+		goto err_import;
+	}
-	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
-	if (ret) {
-		kfree(mem);
-		return ret;
+	bo = gem_to_amdgpu_bo(gobj);
+	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem) {
+		ret = -ENOMEM;
+		goto err_alloc_mem;
    	}
    	if (size)
-		*size = amdgpu_bo_size(bo);
+		*size = amdgpu_bo_size(imported_bo);
    	if (mmap_offset)
-		*mmap_offset = amdgpu_bo_mmap_offset(bo);
+		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
    	INIT_LIST_HEAD(&(*mem)->attachments);
    	mutex_init(&(*mem)->lock);
    	(*mem)->alloc_flags =
-		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
    		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
    		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
    		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
-	drm_gem_object_get(&bo->tbo.base);
+	get_dma_buf(dma_buf);
+	(*mem)->dmabuf = dma_buf;
    	(*mem)->bo = bo;
    	(*mem)->va = va;
-	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
    	(*mem)->mapped_to_gpu_memory = 0;
    	(*mem)->process_info = avm->process_info;
    	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
    	amdgpu_sync_create(&(*mem)->sync);
    	(*mem)->is_imported = true;
+	bo->kfd_bo = *mem;
    	return 0;
+err_import:
+	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+err_alloc_mem:
+	drm_gem_object_put(gobj);
+	return ret;
    }
    /* Evict a userptr BO by stopping the queues if necessary