[AMD Official Use Only - AMD Internal Distribution Only] Series is Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Regards, Hawking -----Original Message----- From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> Sent: Tuesday, May 14, 2024 16:36 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Ma, Le <Le.Ma@xxxxxxx>; Ma, Le <Le.Ma@xxxxxxx> Subject: [PATCH 3/3] drm/amdgpu: Use NPS ranges from discovery table Add GMC API to fetch NPS range information from discovery table. Use NPS range information in GMC 9.4.3 SOCs when available, otherwise fallback to software method. Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> Reviewed-by: Le Ma <le.ma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 92 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 76 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 11 +++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 +++++--- 5 files changed, 212 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 43528ff50e72..afe8d12667f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -367,6 +367,35 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev) } } +static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, + struct binary_header *bhdr) +{ + struct table_info *info; + uint16_t checksum; + uint16_t offset; + + info = &bhdr->table_list[NPS_INFO]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + + struct nps_info_header *nhdr = + (struct nps_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) { + dev_dbg(adev->dev, "invalid ip discovery nps info table id\n"); + return -EINVAL; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le32_to_cpu(nhdr->size_bytes), + checksum)) { + dev_dbg(adev->dev, "invalid nps info data table checksum\n"); + return -EINVAL; + } + + return 0; +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; @@ -1681,6 +1710,69 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) return 0; } +union nps_info { + struct nps_info_v1_0 v1; +}; + +int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, + uint32_t *nps_type, + struct amdgpu_gmc_memrange **ranges, + int *range_cnt) +{ + struct amdgpu_gmc_memrange *mem_ranges; + struct binary_header *bhdr; + union nps_info *nps_info; + u16 offset; + int i; + + if (!nps_type || !range_cnt || !ranges) + return -EINVAL; + + if (!adev->mman.discovery_bin) { + dev_err(adev->dev, + "fetch mem range failed, ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset); + + if (!offset) + return -ENOENT; + + /* If verification fails, return as if NPS table doesn't exist */ + if (amdgpu_discovery_verify_npsinfo(adev, bhdr)) + return -ENOENT; + + nps_info = (union nps_info *)(adev->mman.discovery_bin + offset); + + switch (le16_to_cpu(nps_info->v1.header.version_major)) { + case 1: + *nps_type = nps_info->v1.nps_type; + *range_cnt = nps_info->v1.count; + mem_ranges = kvzalloc( + *range_cnt * sizeof(struct amdgpu_gmc_memrange), + GFP_KERNEL); + for (i = 0; i < *range_cnt; i++) { + mem_ranges[i].base_address = + nps_info->v1.instance_info[i].base_address; + mem_ranges[i].limit_address = + nps_info->v1.instance_info[i].limit_address; + mem_ranges[i].nid_mask = -1; + mem_ranges[i].flags = 0; + } + *ranges = mem_ranges; + break; + default: + dev_err(adev->dev, "Unhandled NPS info table %d.%d\n", + le16_to_cpu(nps_info->v1.header.version_major), + le16_to_cpu(nps_info->v1.header.version_minor)); + return -EINVAL; + } + + return 0; +} + static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) { /* what IP to use for this? */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 4d03cd5b3410..f5d36525ec3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -30,4 +30,9 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); +int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, + uint32_t *nps_type, + struct amdgpu_gmc_memrange **ranges, + int *range_cnt); + #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index f8ed886ffca3..78cd31e929c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1165,3 +1165,79 @@ void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) { device_remove_file(adev->dev, &dev_attr_current_memory_partition); } + +int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges, + int exp_ranges) +{ + struct amdgpu_gmc_memrange *ranges; + int range_cnt, ret, i, j; + uint32_t nps_type; + + if (!mem_ranges) + return -EINVAL; + + ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, + &range_cnt); + + if (ret) + return ret; + + /* TODO: For now, expect ranges and partition count to be the same. + * Adjust if there are holes expected in any NPS domain. + */ + if (range_cnt != exp_ranges) { + dev_warn( + adev->dev, + "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d", + exp_ranges, nps_type, range_cnt); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < exp_ranges; ++i) { + if (ranges[i].base_address >= ranges[i].limit_address) { + dev_warn( + adev->dev, + "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx", + nps_type, i, ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + + /* Check for overlaps, not expecting any now */ + for (j = i - 1; j >= 0; j--) { + if (max(ranges[j].base_address, + ranges[i].base_address) <= + min(ranges[j].limit_address, + ranges[i].limit_address)) { + dev_warn( + adev->dev, + "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]", + ranges[j].base_address, + ranges[j].limit_address, + ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + } + + mem_ranges[i].range.fpfn = + (ranges[i].base_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].range.lpfn = + (ranges[i].limit_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].size = + ranges[i].limit_address - ranges[i].base_address + 1; + } + +err: + kfree(ranges); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 17f40ea1104b..febca3130497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -199,6 +199,13 @@ struct amdgpu_mem_partition_info { #define INVALID_PFN -1 +struct amdgpu_gmc_memrange { + uint64_t base_address; + uint64_t limit_address; + uint32_t flags; + int nid_mask; +}; + enum amdgpu_gart_placement { AMDGPU_GART_PLACEMENT_BEST_FIT = 0, AMDGPU_GART_PLACEMENT_HIGH, @@ -439,4 +446,8 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); +int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges, + int exp_ranges); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7c4e2adae7b3..094c08cb98e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1895,7 +1895,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, { enum amdgpu_memory_partition mode; u32 start_addr = 0, size; - int i; + int i, r, l; mode = gmc_v9_0_query_memory_partition(adev); @@ -1918,23 +1918,39 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, break; } - size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; - size /= adev->gmc.num_mem_partitions; + /* Use NPS range info, if populated */ + r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, + adev->gmc.num_mem_partitions); + if (!r) { + l = 0; + for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { + if (mem_ranges[i].range.lpfn > + mem_ranges[i - 1].range.lpfn) + l = i; + } + + } else { + /* Fallback to sw based calculation */ + size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = + ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } - for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { - mem_ranges[i].range.fpfn = start_addr; - mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT); - mem_ranges[i].range.lpfn = start_addr + size - 1; - start_addr += size; + l = adev->gmc.num_mem_partitions - 1; } /* Adjust the last one */ - mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn = + mem_ranges[l].range.lpfn = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; - mem_ranges[adev->gmc.num_mem_partitions - 1].size = + mem_ranges[l].size = adev->gmc.real_vram_size - - ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn - << AMDGPU_GPU_PAGE_SHIFT); + ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); } static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) -- 2.25.1