[WHY]
Unified memory with xnack off should be tracked, as userptr mappings
and legacy allocations do. To avoid oversuscribe system memory when
xnack off.
[How]
Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
API and call them on every prange creation and free.
Signed-off-by: Alex Sierra<alex.sierra@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +++
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 24 +++++++------
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 34
++++++++++++++-----
3 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f8b9f27adcf5..f55f34af6480 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct
amdgpu_device *adev, struct kgd_mem *
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct
amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag);
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e985cf9c7ec0..b2236159ff39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -122,7 +122,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t
size)
*
* Return: returns -ENOMEM in case of error, ZERO otherwise
*/
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device
*adev,
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
{
uint64_t reserved_for_pt =
@@ -157,8 +157,8 @@ static int
amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.max_system_mem_limit &&
!no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
- (adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ (adev && (adev->kfd.vram_used + vram_needed >
+ adev->gmc.real_vram_size - reserved_for_pt))) {
ret = -ENOMEM;
goto release;
}
@@ -166,7 +166,8 @@ static int
amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
/* Update memory accounting by decreasing available system
* memory, TTM memory and GPU memory as computed above
*/
- adev->kfd.vram_used += vram_needed;
+ if (adev)
+ adev->kfd.vram_used += vram_needed;
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -175,7 +176,7 @@ static int
amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
return ret;
}
-static void unreserve_mem_limit(struct amdgpu_device *adev,
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -184,7 +185,8 @@ static void unreserve_mem_limit(struct
amdgpu_device *adev,
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- adev->kfd.vram_used -= size;
+ if (adev)
+ adev->kfd.vram_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
@@ -193,9 +195,9 @@ static void unreserve_mem_limit(struct
amdgpu_device *adev,
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
-
- WARN_ONCE(adev->kfd.vram_used < 0,
- "KFD VRAM memory accounting unbalanced");
+ if (adev)
+ WARN_ONCE(adev->kfd.vram_used < 0,
+ "KFD VRAM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"KFD TTM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
@@ -211,7 +213,7 @@ void amdgpu_amdkfd_release_notify(struct
amdgpu_bo *bo)
u32 alloc_flags = bo->kfd_bo->alloc_flags;
u64 size = amdgpu_bo_size(bo);
- unreserve_mem_limit(adev, size, alloc_flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
kfree(bo->kfd_bo);
}
@@ -1601,7 +1603,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
- unreserve_mem_limit(adev, size, flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 5ed8d9b549a4..e7e9b388fea4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -261,11 +261,21 @@ void svm_range_free_dma_mappings(struct
svm_range *prange)
static void svm_range_free(struct svm_range *prange)
{
+ uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ struct kfd_process *p;
+
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+ if (!p->xnack_enabled) {
+ pr_debug("unreserve mem limit: %lld\n", size);
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ }
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
@@ -284,7 +294,7 @@ svm_range_set_default_attributes(int32_t
*location, int32_t *prefetch_loc,
static struct
svm_range *svm_range_new(struct svm_range_list *svms, uint64_t
start,
- uint64_t last)
+ uint64_t last, bool is_new_alloc)
{
uint64_t size = last - start + 1;
struct svm_range *prange;
@@ -293,6 +303,15 @@ svm_range *svm_range_new(struct svm_range_list
*svms, uint64_t start,
prange = kzalloc(sizeof(*prange), GFP_KERNEL);
if (!prange)
return NULL;
+
+ p = container_of(svms, struct kfd_process, svms);
+ if (!p->xnack_enabled && is_new_alloc &&
+ amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {