On 2025-02-20 6:59, Emily Deng wrote: > Call amdgpu_amdkfd_reserve_mem_limit in svm_range_vram_node_new when > creating a new SVM BO. Call amdgpu_amdkfd_unreserve_mem_limit > in svm_range_bo_release when the SVM BO is deleted. > > Signed-off-by: Emily Deng <Emily.Deng@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 13 ------------- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 20 ++++++++++++++++++++ > 2 files changed, 20 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > index 79251f22b702..3bbc69751f7e 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > @@ -514,15 +514,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, > start = start_mgr << PAGE_SHIFT; > end = (last_mgr + 1) << PAGE_SHIFT; > > - r = amdgpu_amdkfd_reserve_mem_limit(node->adev, > - prange->npages * PAGE_SIZE, > - KFD_IOC_ALLOC_MEM_FLAGS_VRAM, > - node->xcp ? node->xcp->id : 0); > - if (r) { > - dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r); > - return -ENOSPC; > - } > - > r = svm_range_vram_node_new(node, prange, true); > if (r) { > dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); > @@ -560,10 +551,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, > } > > out: > - amdgpu_amdkfd_unreserve_mem_limit(node->adev, > - prange->npages * PAGE_SIZE, > - KFD_IOC_ALLOC_MEM_FLAGS_VRAM, > - node->xcp ? node->xcp->id : 0); > return r < 0 ? r : 0; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index db3034b00dac..c861d8c90419 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -430,6 +430,10 @@ static void svm_range_bo_release(struct kref *kref) > /* We're not in the eviction worker. Signal the fence. */ > dma_fence_signal(&svm_bo->eviction_fence->base); > dma_fence_put(&svm_bo->eviction_fence->base); > + amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev, > + svm_bo->bo->tbo.base.size, > + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, > + svm_bo->node->xcp ? svm_bo->node->xcp->id : 0); > amdgpu_bo_unref(&svm_bo->bo); > kfree(svm_bo); > } > @@ -581,6 +585,18 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, > kfree(svm_bo); > return -ESRCH; > } > + > + r = amdgpu_amdkfd_reserve_mem_limit(node->adev, > + prange->npages * PAGE_SIZE, > + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, > + node->xcp ? node->xcp->id : 0); > + if (r) { > + pr_debug("failed to reserve VRAM, r: %ld\n", r); > + mmput(mm); > + kfree(svm_bo); > + return -ENOSPC; > + } > + I believe the error handling would be simpler if you moved this after the mmput call. Then in case of an error just use goto to a new label after the unreserve_mem_limit call you're adding below. Other than that the patch looks good to me. Regards, Felix > svm_bo->node = node; > svm_bo->eviction_fence = > amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), > @@ -655,6 +671,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, > reserve_bo_failed: > amdgpu_bo_unref(&bo); > create_bo_failed: > + amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev, > + prange->npages * PAGE_SIZE, > + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, > + node->xcp ? node->xcp->id : 0); > dma_fence_put(&svm_bo->eviction_fence->base); > kfree(svm_bo); > prange->ttm_res = NULL;