Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/25/ , Felix Kuehling wrote:
> 
> Am 2022-07-25 um 06:32 schrieb Lang Yu:
> > We have memory leak issue in current implenmention, i.e.,
> > the allocated struct kgd_mem memory is not handled properly.
> > 
> > The idea is always creating a buffer object when importing
> > a gfx BO based dmabuf.
> > 
> > Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx>
> > ---
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
> >   1 file changed, 70 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index b3806ebe5ef7..c1855b72a3f0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> >   	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> >   	u64 size = amdgpu_bo_size(bo);
> > -	unreserve_mem_limit(adev, size, alloc_flags);
> > +	if (!bo->kfd_bo->is_imported)
> > +		unreserve_mem_limit(adev, size, alloc_flags);
> >   	kfree(bo->kfd_bo);
> >   }
> > @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> >   	}
> >   }
> > +static struct drm_gem_object*
> > +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> > +{
> > +	struct drm_gem_object *gobj;
> > +	struct amdgpu_bo *abo;
> > +
> > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> 
> I'd rather remove this limitation. We should be able to use any DMABuf with
> KFD. This check was added when we basically sidestepped all the dmabuf
> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
> want to keep doing that.

This limitation here is to just reference the gobj if it is an amdgpu bo
and from same device instead of creating a gobj when importing a dmabuf.

> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
> import" sent to amd-gfx on March 16. I'm about to send an updated version of
> this as part of upstream RDMA support soon.

The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
memory leak issue. Looking forward to the updated version. Thanks!

Regards,
Lang

> Regards,
>   Felix
> 
> 
> > +		gobj = dma_buf->priv;
> > +		abo = gem_to_amdgpu_bo(gobj);
> > +		if (gobj->dev == dev && abo->kfd_bo) {
> > +			drm_gem_object_get(gobj);
> > +			return gobj;
> > +		}
> > +	}
> > +
> > +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> > +}
> > +
> >   static int
> >   kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> >   		      struct amdgpu_bo **bo)
> > @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> >   		}
> >   	}
> > -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
> >   	if (IS_ERR(gobj))
> >   		return PTR_ERR(gobj);
> > @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   {
> >   	struct amdkfd_process_info *process_info = mem->process_info;
> >   	unsigned long bo_size = mem->bo->tbo.base.size;
> > +	bool is_imported = false;
> > +	struct drm_gem_object *imported_gobj;
> >   	struct kfd_mem_attachment *entry, *tmp;
> >   	struct bo_vm_reservation_context ctx;
> >   	struct ttm_validate_buffer *bo_list_entry;
> >   	unsigned int mapped_to_gpu_memory;
> >   	int ret;
> > -	bool is_imported = false;
> >   	mutex_lock(&mem->lock);
> > @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   	}
> >   	/* Free the BO*/
> > -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > +	if (!is_imported) {
> > +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > +	} else {
> > +		imported_gobj = mem->dmabuf->priv;
> > +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > +	}
> > +
> >   	if (mem->dmabuf)
> >   		dma_buf_put(mem->dmabuf);
> >   	mutex_destroy(&mem->lock);
> > @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> >   				      uint64_t *mmap_offset)
> >   {
> >   	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> > -	struct drm_gem_object *obj;
> > -	struct amdgpu_bo *bo;
> > +	struct drm_gem_object *imported_gobj, *gobj;
> > +	struct amdgpu_bo *imported_bo, *bo;
> >   	int ret;
> > -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> > -		/* Can't handle non-graphics buffers */
> > +	/*
> > +	 * Can't handle non-graphics buffers and
> > +	 * buffers from other devices
> > +	 */
> > +	imported_gobj = dma_buf->priv;
> > +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> > +	    drm_to_adev(imported_gobj->dev) != adev)
> >   		return -EINVAL;
> > -	obj = dma_buf->priv;
> > -	if (drm_to_adev(obj->dev) != adev)
> > -		/* Can't handle buffers from other devices */
> > +	/* Only VRAM and GTT BOs are supported */
> > +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> > +	if (!(imported_bo->preferred_domains &
> > +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
> >   		return -EINVAL;
> > -	bo = gem_to_amdgpu_bo(obj);
> > -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> > -				    AMDGPU_GEM_DOMAIN_GTT)))
> > -		/* Only VRAM and GTT BOs are supported */
> > -		return -EINVAL;
> > +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> > +	if (ret)
> > +		return ret;
> > -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > -	if (!*mem)
> > -		return -ENOMEM;
> > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> > +	if (IS_ERR(gobj)) {
> > +		ret = PTR_ERR(gobj);
> > +		goto err_import;
> > +	}
> > -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> > -	if (ret) {
> > -		kfree(mem);
> > -		return ret;
> > +	bo = gem_to_amdgpu_bo(gobj);
> > +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> > +
> > +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > +	if (!*mem) {
> > +		ret = -ENOMEM;
> > +		goto err_alloc_mem;
> >   	}
> >   	if (size)
> > -		*size = amdgpu_bo_size(bo);
> > +		*size = amdgpu_bo_size(imported_bo);
> >   	if (mmap_offset)
> > -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> > +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
> >   	INIT_LIST_HEAD(&(*mem)->attachments);
> >   	mutex_init(&(*mem)->lock);
> >   	(*mem)->alloc_flags =
> > -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> >   		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
> >   		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
> >   		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> > -	drm_gem_object_get(&bo->tbo.base);
> > +	get_dma_buf(dma_buf);
> > +	(*mem)->dmabuf = dma_buf;
> >   	(*mem)->bo = bo;
> >   	(*mem)->va = va;
> > -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> > +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
> >   	(*mem)->mapped_to_gpu_memory = 0;
> >   	(*mem)->process_info = avm->process_info;
> >   	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
> >   	amdgpu_sync_create(&(*mem)->sync);
> >   	(*mem)->is_imported = true;
> > +	bo->kfd_bo = *mem;
> >   	return 0;
> > +err_import:
> > +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > +err_alloc_mem:
> > +	drm_gem_object_put(gobj);
> > +	return ret;
> >   }
> >   /* Evict a userptr BO by stopping the queues if necessary



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux