This is a note to let you know that I've just added the patch titled drm/amdkfd: change system memory overcommit limit to the 4.19-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: drm-amdkfd-change-system-memory-overcommit-limit.patch and it can be found in the queue-4.19 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit 139e701f1dcbd438ad32f6b0120a2e0951d75898 Author: Eric Huang <JinhuiEric.Huang@xxxxxxx> Date: Wed Sep 5 11:46:14 2018 -0400 drm/amdkfd: change system memory overcommit limit [ Upstream commit 5d240da93edc29adb68320c5e475dc9c7fcad5dd ] It is to improve system limit by: 1. replacing userptrlimit with a total memory limit that conunts TTM memory usage and userptr usage. 2. counting acc size for all BOs. Signed-off-by: Eric Huang <JinHuiEric.Huang@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Stable-dep-of: 25e9227c6afd ("drm/amdgpu: Fix leak when GPU memory allocation fails") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4488aad64643b..13a03f467688a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -46,9 +46,9 @@ /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; - uint64_t max_userptr_mem_limit; + uint64_t max_ttm_mem_limit; int64_t system_mem_used; - int64_t userptr_mem_used; + int64_t ttm_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (kernel) memory - 3/8th System RAM - * Userptr memory - 3/4th System RAM + * System (TTM + userptr) memory - 3/4th System RAM + * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); - pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), - (kfd_mem_limit.max_userptr_mem_limit >> 20)); + (kfd_mem_limit.max_ttm_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) + uint64_t size, u32 domain, bool sg) { - size_t acc_size; + size_t acc_size, system_mem_needed, ttm_mem_needed; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { - if (kfd_mem_limit.system_mem_used + (acc_size + size) > - kfd_mem_limit.max_system_mem_limit) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { - if ((kfd_mem_limit.system_mem_used + acc_size > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.userptr_mem_used + (size + acc_size) > - kfd_mem_limit.max_userptr_mem_limit)) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += acc_size; - kfd_mem_limit.userptr_mem_used += size; + /* TTM GTT memory */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size + size; + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + /* Userptr */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size; + } else { + /* VRAM and SG */ + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; + } + + if ((kfd_mem_limit.system_mem_used + system_mem_needed > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit)) + ret = -ENOMEM; + else { + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; } -err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } static void unreserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) + uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.ttm_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + kfd_mem_limit.system_mem_used -= (acc_size + size); + kfd_mem_limit.ttm_mem_used -= acc_size; + } else { kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.userptr_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= acc_size; } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, + "kfd TTM memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) spin_lock(&kfd_mem_limit.mem_limit_lock); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); + kfd_mem_limit.ttm_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } else { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, + "kfd TTM memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -1201,10 +1217,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, + alloc_domain, false); if (ret) { pr_debug("Insufficient system memory\n"); - goto err_reserve_system_mem; + goto err_reserve_limit; } pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", @@ -1252,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ - goto err_reserve_system_mem; + goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain); -err_reserve_system_mem: + unreserve_system_mem_limit(adev, size, alloc_domain, false); +err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); return ret;