Re: [PATCH] drm/amdkfd: Correct the postion of reserve and unreserve memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2025-02-20 06:59, Emily Deng wrote:
Call amdgpu_amdkfd_reserve_mem_limit in svm_range_vram_node_new when
creating a new SVM BO. Call amdgpu_amdkfd_unreserve_mem_limit
in svm_range_bo_release when the SVM BO is deleted.

Signed-off-by: Emily Deng <Emily.Deng@xxxxxxx>
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 13 -------------
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 20 ++++++++++++++++++++
  2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 79251f22b702..3bbc69751f7e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -514,15 +514,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
  	start = start_mgr << PAGE_SHIFT;
  	end = (last_mgr + 1) << PAGE_SHIFT;
- r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
-					prange->npages * PAGE_SIZE,
-					KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
-					node->xcp ? node->xcp->id : 0);
-	if (r) {
-		dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
-		return -ENOSPC;
-	}
-
From git history, this is to "make sure there is enough available VRAM and migrating to VRAM doesn't evict

    other possible non-unified memory BOs.", without this, it will trigger unexpected OOM killer.

We should keep this for xnack on.

  	r = svm_range_vram_node_new(node, prange, true);
  	if (r) {
  		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
@@ -560,10 +551,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
  	}
out:
-	amdgpu_amdkfd_unreserve_mem_limit(node->adev,
-					prange->npages * PAGE_SIZE,
-					KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
-					node->xcp ? node->xcp->id : 0);
Keep this for xnack on.
  	return r < 0 ? r : 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index db3034b00dac..c861d8c90419 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -430,6 +430,10 @@ static void svm_range_bo_release(struct kref *kref)
  		/* We're not in the eviction worker. Signal the fence. */
  		dma_fence_signal(&svm_bo->eviction_fence->base);
  	dma_fence_put(&svm_bo->eviction_fence->base);
+	amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev,
+		svm_bo->bo->tbo.base.size,
+		KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+		svm_bo->node->xcp ? svm_bo->node->xcp->id : 0);
  	amdgpu_bo_unref(&svm_bo->bo);
  	kfree(svm_bo);
  }
@@ -581,6 +585,18 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
  		kfree(svm_bo);
  		return -ESRCH;
  	}
+
+	r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+			prange->npages * PAGE_SIZE,
+			KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+			node->xcp ? node->xcp->id : 0);
+	if (r) {
+		pr_debug("failed to reserve VRAM, r: %ld\n", r);
+		mmput(mm);
+		kfree(svm_bo);
+		return -ENOSPC;
+	}
+

The reserve VRAM limit is only for xnack off, as xnack on VRAM over commit should work.

Regards,

Philip

  	svm_bo->node = node;
  	svm_bo->eviction_fence =
  		amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -655,6 +671,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
  reserve_bo_failed:
  	amdgpu_bo_unref(&bo);
  create_bo_failed:
+	amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev,
+		prange->npages * PAGE_SIZE,
+		KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+		node->xcp ? node->xcp->id : 0);
  	dma_fence_put(&svm_bo->eviction_fence->base);
  	kfree(svm_bo);
  	prange->ttm_res = NULL;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux