Rework the KFD max system memory and ttm limit to allow bigger system memory allocations upto 63/64 of the available memory which is controlled by ttm module params pages_limit and page_pool_size. Also for NPS1 mode, report the max ttm limit as the available VRAM size. For max system memory limit, leave 1GB exclusively outside ROCm allocations i.e. on 16GB system, >14 GB can be used by ROCm still leaving some memory for other system applications and on 128GB systems (e.g. GFXIP 9.4.3 APU in NPS1 mode) nearly >120GB can be used by ROCm. Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 25 +++++++++++++------ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9e18fe5eb190..3387dcdf1bc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -44,6 +44,7 @@ * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +#define ONE_GB (1UL << 30) /* * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB @@ -117,11 +118,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) return; si_meminfo(&si); - mem = si.freeram - si.freehigh; + mem = si.totalram - si.totalhigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6) - (ONE_GB); kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8447fcada8bb..4962e35df617 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -25,6 +25,7 @@ #include <linux/pci.h> #include <drm/drm_cache.h> +#include <drm/ttm/ttm_tt.h> #include "amdgpu.h" #include "gmc_v9_0.h" @@ -1877,6 +1878,7 @@ static void gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges) { + uint64_t max_ttm_size = ttm_tt_pages_limit() << PAGE_SHIFT; int num_ranges = 0, ret, mem_groups; struct amdgpu_numa_info numa_info; int node_ids[MAX_MEM_RANGES]; @@ -1913,8 +1915,17 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, /* If there is only partition, don't use entire size */ if (adev->gmc.num_mem_partitions == 1) { - mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); - do_div(mem_ranges[0].size, mem_groups); + if (max_ttm_size > mem_ranges[0].size || max_ttm_size <= 0) { + /* Report VRAM as 3/4th of available numa memory */ + mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); + do_div(mem_ranges[0].size, mem_groups); + } else { + /* Report VRAM as set by ttm.pages_limit or default ttm + * limit which is 1/2 of system memory + */ + mem_ranges[0].size = max_ttm_size; + } + pr_debug("NPS1 mode, setting VRAM size = %llu\n", mem_ranges[0].size); } } @@ -2159,6 +2170,11 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + #ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { r = gmc_v9_0_init_mem_ranges(adev); @@ -2167,11 +2183,6 @@ static int gmc_v9_0_sw_init(void *handle) } #endif - /* Memory manager */ - r = amdgpu_bo_init(adev); - if (r) - return r; - r = gmc_v9_0_gart_init(adev); if (r) return r; -- 2.17.1