From: Mukul Joshi <mukul.joshi@xxxxxxx> We need to track memory usage on a per partition basis. To do that, store the local memory information in KFD node instead of kfd device. v2: squash in fix ("amdkfd: Use mem_id to access mem_partition info") Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 +++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 12 +++++++----- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ++++--- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 +++++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 ++++--- 7 files changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 00edb13d2124..85df73f2c85e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -428,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, } void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, - struct kfd_local_mem_info *mem_info) + struct kfd_local_mem_info *mem_info, + uint8_t xcp_id) { memset(mem_info, 0, sizeof(*mem_info)); - mem_info->local_mem_size_public = adev->gmc.visible_vram_size; - mem_info->local_mem_size_private = adev->gmc.real_vram_size - + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size) + mem_info->local_mem_size_public = + KFD_XCP_MEMORY_SIZE(adev, xcp_id); + else + mem_info->local_mem_size_private = + KFD_XCP_MEMORY_SIZE(adev, xcp_id); + } else { + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - adev->gmc.visible_vram_size; - + } mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e6221bccffe..4bf6f5659568 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -231,7 +231,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, - struct kfd_local_mem_info *mem_info); + struct kfd_local_mem_info *mem_info, + uint8_t xcp_id); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); @@ -334,10 +335,11 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, ((adev)->xcp_mgr && (xcp_id) >= 0 ?\ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) -#define KFD_XCP_MEMORY_SIZE(n) ((n)->adev->gmc.num_mem_partitions ?\ - (n)->adev->gmc.mem_partitions[(n)->xcp->mem_id].size /\ - (n)->adev->xcp_mgr->num_xcp_per_mem_partition :\ - (n)->adev->gmc.real_vram_size) +#define KFD_XCP_MEMORY_SIZE(adev, xcp_id)\ + ((adev)->gmc.num_mem_partitions && (xcp_id) >= 0 ?\ + (adev)->gmc.mem_partitions[KFD_XCP_MEM_ID((adev), (xcp_id))].size /\ + (adev)->xcp_mgr->num_xcp_per_mem_partition :\ + (adev)->gmc.real_vram_size) #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 344b238d6771..089e1d498670 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1023,11 +1023,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) if (dev->kfd->use_iommu_v2) return false; - if (dev->kfd->local_mem_info.local_mem_size_private == 0 && - dev->kfd->local_mem_info.local_mem_size_public > 0) + if (dev->local_mem_info.local_mem_size_private == 0 && + dev->local_mem_info.local_mem_size_public > 0) return true; - if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { + if (dev->local_mem_info.local_mem_size_public == 0 && + dev->kfd->adev->gmc.is_app_apu) { pr_debug("APP APU, Consider like a large bar system\n"); return true; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1aaf933f9f48..950af6820153 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2191,7 +2191,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - local_mem_info = kdev->kfd->local_mem_info; + local_mem_info = kdev->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index db5b53fcdf11..d41da964d2f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -726,7 +726,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n", kfd->num_nodes); @@ -756,7 +755,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (node->xcp) { dev_info(kfd_device, "KFD node %d partition %d size %lldM\n", node->node_id, node->xcp->mem_id, - KFD_XCP_MEMORY_SIZE(node) >> 20); + KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); } if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && @@ -783,6 +782,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } node->max_proc_per_quantum = max_proc_per_quantum; atomic_set(&node->sram_ecc_flag, 0); + + amdgpu_amdkfd_get_local_mem_info(kfd->adev, + &node->local_mem_info, node->xcp->id); + /* Initialize the KFD node */ if (kfd_init_node(node)) { dev_err(kfd_device, "Error initializing KFD node\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 44f4d5509db6..3bd222e8f6c3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -313,6 +313,8 @@ struct kfd_node { unsigned int compute_vmid_bitmap; + struct kfd_local_mem_info local_mem_info; + struct kfd_dev *kfd; }; @@ -335,7 +337,6 @@ struct kfd_dev { */ struct kgd2kfd_shared_resources shared_resources; - struct kfd_local_mem_info local_mem_info; const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dbb6159344b3..e0bacf017a40 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1152,8 +1152,8 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) if (!gpu) return 0; - local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + - gpu->kfd->local_mem_info.local_mem_size_public; + local_mem_size = gpu->local_mem_info.local_mem_size_private + + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->adev->pdev->devfn; buf[1] = gpu->adev->pdev->subsystem_vendor | (gpu->adev->pdev->subsystem_device << 16); @@ -1234,7 +1234,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, + dev->gpu->xcp->id); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; -- 2.40.1