On 10/29/2022 4:17 AM, Felix Kuehling wrote: > On 2022-10-27 04:14, Ma Jun wrote: >> For some GPUs with more CUs, the original sibling_map[32] >> >> in struct crat_subtype_cache is not enough >> >> to save the cache information when create the VCRAT table, >> >> so skip filling the struct crat_subtype_cache info instead >> >> fill struct kfd_cache_properties directly to fix this problem. >> >> v3: >> - Drop processor id calc function >> v2: >> - Remove unnecessary sys interface "cache_ext" >> >> Signed-off-by: Ma Jun <Jun.Ma2@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 307 +++------------------- >> drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 12 + >> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 238 ++++++++++++++++- >> drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 5 +- >> 4 files changed, 278 insertions(+), 284 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> index d25ac9cbe5b2..8b7e34b45740 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > [snip] >> +int get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) >> { >> - struct kfd_gpu_cache_info *pcache_info; >> struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; >> int num_of_cache_types = 0; >> - int i, j, k; >> - int ct = 0; >> - int mem_available = available_size; >> - unsigned int cu_processor_id; >> - int ret; >> - unsigned int num_cu_shared; >> >> switch (kdev->adev->asic_type) { > [snip] >> >> default: >> switch (KFD_GC_VERSION(kdev)) { > [snip] >> case IP_VERSION(11, 0, 0): >> case IP_VERSION(11, 0, 1): >> case IP_VERSION(11, 0, 2): >> case IP_VERSION(11, 0, 3): >> - pcache_info = cache_info; >> + *pcache_info = cache_info; > > This won't work. cache_info is a local variable. It will be out of scope > as soon as this function returns. You'll need to allocate this in some > data structure that will persist after the function returns. Maybe > expect the caller to pass in a pointer to an array in their own stack frame. Yes, this is my mistake. Will fix in next version > > >> num_of_cache_types = >> - kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); >> + kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); > [snip] >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> index e0680d265a66..dc231e248258 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > > [snip] > > > int kfd_topology_add_device(struct kfd_dev *gpu) >> { >> uint32_t gpu_id; >> @@ -1759,6 +1970,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) >> topology_crat_proximity_domain--; >> return res; >> } >> + >> res = kfd_parse_crat_table(crat_image, >> &temp_topology_device_list, >> proximity_domain); >> @@ -1771,23 +1983,31 @@ int kfd_topology_add_device(struct kfd_dev *gpu) >> >> kfd_topology_update_device_list(&temp_topology_device_list, >> &topology_device_list); >> + up_write(&topology_lock); > > I'm not sure if dropping and re-taking the topology lock here could lead > to race conditions. But this could be avoided, if you moved the > responsibility for topology locking out of kfd_assign_gpu and into the > caller (kfd_topology_add_device). Thanks for review.Will fix in next version. Regards, Ma Jun > > Regards, > Felix > > >> + >> + dev = kfd_assign_gpu(gpu); >> + if (WARN_ON(!dev)) { >> + res = -ENODEV; >> + goto err; >> + } >> + >> + down_write(&topology_lock); >> + >> + /* Fill the cache affinity information here for the GPUs >> + * using VCRAT >> + */ >> + kfd_fill_cache_non_crat_info(dev, gpu); >> >> /* Update the SYSFS tree, since we added another topology >> * device >> */ >> res = kfd_topology_update_sysfs(); >> up_write(&topology_lock); >> - >> if (!res) >> sys_props.generation_count++; >> else >> pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", >> gpu_id, res); >> - dev = kfd_assign_gpu(gpu); >> - if (WARN_ON(!dev)) { >> - res = -ENODEV; >> - goto err; >> - } >> } >> >> dev->gpu_id = gpu_id; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> index dc4e239c8f8f..3e8ac87f0ac9 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> @@ -87,6 +87,8 @@ struct kfd_mem_properties { >> struct attribute attr_used; >> }; >> >> +#define CACHE_SIBLINGMAP_SIZE 64 >> + >> struct kfd_cache_properties { >> struct list_head list; >> uint32_t processor_id_low; >> @@ -97,10 +99,11 @@ struct kfd_cache_properties { >> uint32_t cache_assoc; >> uint32_t cache_latency; >> uint32_t cache_type; >> - uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; >> + uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE]; >> struct kfd_dev *gpu; >> struct kobject *kobj; >> struct attribute attr; >> + uint32_t sibling_map_size; >> }; >> >> struct kfd_iolink_properties {