[Public] >-----Original Message----- >From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> >Sent: Saturday, April 27, 2024 6:52 AM >To: Yu, Lang <Lang.Yu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx >Cc: Yang, Philip <Philip.Yang@xxxxxxx>; Koenig, Christian ><Christian.Koenig@xxxxxxx>; Zhang, Yifan <Yifan1.Zhang@xxxxxxx>; Liu, >Aaron <Aaron.Liu@xxxxxxx> >Subject: Re: [PATCH 1/2] drm/amdkfd: Let VRAM allocations go to GTT >domain on small APUs > > >On 2024-04-26 04:37, Lang Yu wrote: >> Small APUs(i.e., consumer, embedded products) usually have a small >> carveout device memory which can't satisfy most compute workloads >> memory allocation requirements. >> >> We can't even run a Basic MNIST Example with a default 512MB carveout. >> https://github.com/pytorch/examples/tree/main/mnist. >> >> Though we can change BIOS settings to enlarge carveout size, which is >> inflexible and may bring complaint. On the other hand, the memory >> resource can't be effectively used between host and device. >> >> The solution is MI300A approach, i.e., let VRAM allocations go to GTT. >> >> Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx> > >Two nit-picks inline. Other than that, this patch looks reasonable to me. Thanks. Will update them accordingly. Regards, Lang > >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++++- >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 21 +++++++++++- >------- >> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- >> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++-- >> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 ++- >> 5 files changed, 24 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >> index 7ba05f030dd1..3295838e9a1d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c >> @@ -456,7 +456,9 @@ void amdgpu_amdkfd_get_local_mem_info(struct >amdgpu_device *adev, >> mem_info->local_mem_size_private = >> KFD_XCP_MEMORY_SIZE(adev, xcp- >>id); >> } else { >> - mem_info->local_mem_size_public = adev- >>gmc.visible_vram_size; >> + mem_info->local_mem_size_public = adev->flags & >AMD_IS_APU ? >> + (ttm_tt_pages_limit() << >PAGE_SHIFT) : >> + adev- >>gmc.visible_vram_size; >> mem_info->local_mem_size_private = adev- >>gmc.real_vram_size - >> adev->gmc.visible_vram_size; > >On an APU the private size should be reported as 0. > > >> } >> @@ -824,6 +826,8 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct >amdgpu_device *adev, int xcp_id) >> } >> do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); >> return ALIGN_DOWN(tmp, PAGE_SIZE); >> + } else if (adev->flags & AMD_IS_APU) { >> + return (ttm_tt_pages_limit() << PAGE_SHIFT); >> } else { >> return adev->gmc.real_vram_size; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index c4f9960dafbb..7eb5afcc4895 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct >amdgpu_device *adev, >> return -EINVAL; >> >> vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); >> - if (adev->gmc.is_app_apu) { >> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { >> system_mem_needed = size; >> ttm_mem_needed = size; >> } >> @@ -232,7 +232,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct >amdgpu_device *adev, >> "adev reference can't be null when vram is used"); >> if (adev && xcp_id >= 0) { >> adev->kfd.vram_used[xcp_id] += vram_needed; >> - adev->kfd.vram_used_aligned[xcp_id] += adev- >>gmc.is_app_apu ? >> + adev->kfd.vram_used_aligned[xcp_id] += >> + (adev->gmc.is_app_apu || adev->flags & >AMD_IS_APU) ? >> vram_needed : >> ALIGN(vram_needed, >VRAM_AVAILABLITY_ALIGN); >> } >> @@ -260,7 +261,7 @@ void >amdgpu_amdkfd_unreserve_mem_limit(struct >> amdgpu_device *adev, >> >> if (adev) { >> adev->kfd.vram_used[xcp_id] -= size; >> - if (adev->gmc.is_app_apu) { >> + if (adev->gmc.is_app_apu || adev->flags & >AMD_IS_APU) { >> adev->kfd.vram_used_aligned[xcp_id] -= size; >> kfd_mem_limit.system_mem_used -= size; >> kfd_mem_limit.ttm_mem_used -= size; @@ - >889,7 +890,7 @@ static >> int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, >> * if peer device has large BAR. In contrast, access over xGMI is >> * allowed for both small and large BAR configurations of peer device >> */ >> - if ((adev != bo_adev && !adev->gmc.is_app_apu) && >> + if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & >> +AMD_IS_APU)) && >> ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || >> (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || >> (mem->alloc_flags & >KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ >> -1657,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct >amdgpu_device *adev, >> - atomic64_read(&adev->vram_pin_size) >> - reserved_for_pt; >> >> - if (adev->gmc.is_app_apu) { >> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { >> system_mem_available = no_system_mem_limit ? >> > kfd_mem_limit.max_system_mem_limit : >> > kfd_mem_limit.max_system_mem_limit - @@ -1669,6 +1670,7 @@ >> size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device >*adev, >> available = min3(system_mem_available, ttm_mem_available, >> vram_available); >> available = ALIGN_DOWN(available, PAGE_SIZE); >> + > >Unnecessary whitespace change. > >Regards, > Felix > > >> } else { >> available = ALIGN_DOWN(vram_available, >VRAM_AVAILABLITY_ALIGN); >> } >> @@ -1705,7 +1707,7 @@ int >amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( >> if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { >> domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; >> >> - if (adev->gmc.is_app_apu) { >> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { >> domain = AMDGPU_GEM_DOMAIN_GTT; >> alloc_domain = AMDGPU_GEM_DOMAIN_GTT; >> alloc_flags = 0; >> @@ -1952,7 +1954,7 @@ int >amdgpu_amdkfd_gpuvm_free_memory_of_gpu( >> if (size) { >> if (!is_imported && >> (mem->bo->preferred_domains == >AMDGPU_GEM_DOMAIN_VRAM || >> - (adev->gmc.is_app_apu && >> + ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) && >> mem->bo->preferred_domains == >AMDGPU_GEM_DOMAIN_GTT))) >> *size = bo_size; >> else >> @@ -2374,8 +2376,9 @@ static int import_obj_create(struct >amdgpu_device *adev, >> (*mem)->dmabuf = dma_buf; >> (*mem)->bo = bo; >> (*mem)->va = va; >> - (*mem)->domain = (bo->preferred_domains & >AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? >> - AMDGPU_GEM_DOMAIN_VRAM : >AMDGPU_GEM_DOMAIN_GTT; >> + (*mem)->domain = (bo->preferred_domains & >AMDGPU_GEM_DOMAIN_VRAM) && >> + !(adev->gmc.is_app_apu || adev->flags & >AMD_IS_APU) ? >> + AMDGPU_GEM_DOMAIN_VRAM : >AMDGPU_GEM_DOMAIN_GTT; >> >> (*mem)->mapped_to_gpu_memory = 0; >> (*mem)->process_info = avm->process_info; diff --git >> a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c >> index 4bcfbeac48fb..4816fcb9803a 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c >> @@ -1023,7 +1023,7 @@ int kgd2kfd_init_zone_device(struct >amdgpu_device *adev) >> if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1)) >> return -EINVAL; >> >> - if (adev->gmc.is_app_apu) >> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) >> return 0; >> >> pgmap = &kfddev->pgmap; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> index bfab16b43fec..238ac11bb97d 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> @@ -2619,7 +2619,8 @@ svm_range_best_restore_location(struct >svm_range *prange, >> return -1; >> } >> >> - if (node->adev->gmc.is_app_apu) >> + if (node->adev->gmc.is_app_apu || >> + node->adev->flags & AMD_IS_APU) >> return 0; >> >> if (prange->preferred_loc == gpuid || @@ -3337,7 +3338,8 @@ >> svm_range_best_prefetch_location(struct svm_range *prange) >> goto out; >> } >> >> - if (bo_node->adev->gmc.is_app_apu) { >> + if (bo_node->adev->gmc.is_app_apu || >> + bo_node->adev->flags & AMD_IS_APU) { >> best_loc = 0; >> goto out; >> } >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> index 026863a0abcd..9c37bd0567ef 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> @@ -201,7 +201,8 @@ void svm_range_list_lock_and_flush_work(struct >svm_range_list *svms, struct mm_s >> * is initialized to not 0 when page migration register device memory. >> */ >> #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 >||\ >> - (adev)->gmc.is_app_apu) >> + (adev)->gmc.is_app_apu ||\ >> + ((adev)->flags & AMD_IS_APU)) >> >> void svm_range_bo_unref_async(struct svm_range_bo *svm_bo); >>