[AMD Official Use Only - General] > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Sent: Thursday, September 7, 2023 4:51 PM > To: Joshi, Mukul <Mukul.Joshi@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCHv2 2/4] drm/amdkfd: Update cache info reporting for GFX > v9.4.3 > > > On 2023-09-06 11:44, Mukul Joshi wrote: > > Update cache info reporting in sysfs to report the correct number of > > CUs and associated cache information based on different spatial > > partitioning modes. > > > > Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> > > --- > > v1->v2: > > - Revert the change in kfd_crat.c > > - Add a comment to not change value of CRAT_SIBLINGMAP_SIZE. > > > > drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 ++ > > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +++++++++++++------- > --- > > drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +- > > 3 files changed, 51 insertions(+), 37 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h > > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h > > index 387a8ef49385..74c2d7a0d628 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h > > @@ -79,6 +79,10 @@ struct crat_header { > > #define CRAT_SUBTYPE_IOLINK_AFFINITY 5 > > #define CRAT_SUBTYPE_MAX 6 > > > > +/* > > + * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32 > > + * as it breaks the ABI. > > + */ > > #define CRAT_SIBLINGMAP_SIZE 32 > > > > /* > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > > index c54795682dfb..b98cc7930e4c 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > > @@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct > kfd_cache_properties **props_ext, > > static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, > > struct kfd_gpu_cache_info *pcache_info, > > struct kfd_cu_info *cu_info, > > - int cache_type, unsigned int cu_processor_id) > > + int cache_type, unsigned int cu_processor_id, > > + struct kfd_node *knode) > > { > > unsigned int cu_sibling_map_mask; > > int first_active_cu; > > - int i, j, k; > > + int i, j, k, xcc, start, end; > > struct kfd_cache_properties *pcache = NULL; > > > > - cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; > > + start = ffs(knode->xcc_mask) - 1; > > + end = start + NUM_XCC(knode->xcc_mask); > > + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; > > cu_sibling_map_mask &= > > ((1 << pcache_info[cache_type].num_cu_shared) - 1); > > first_active_cu = ffs(cu_sibling_map_mask); @@ -1638,16 +1641,18 > @@ > > static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, > > cu_sibling_map_mask = cu_sibling_map_mask >> > (first_active_cu - 1); > > k = 0; > > > > - for (i = 0; i < cu_info->num_shader_engines; i++) { > > - for (j = 0; j < cu_info- > >num_shader_arrays_per_engine; j++) { > > - pcache->sibling_map[k] = > (uint8_t)(cu_sibling_map_mask & 0xFF); > > - pcache->sibling_map[k+1] = > (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); > > - pcache->sibling_map[k+2] = > (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); > > - pcache->sibling_map[k+3] = > (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); > > - k += 4; > > - > > - cu_sibling_map_mask = cu_info- > >cu_bitmap[0][i % 4][j + i / 4]; > > - cu_sibling_map_mask &= ((1 << > pcache_info[cache_type].num_cu_shared) - 1); > > + for (xcc = start; xcc < end; xcc++) { > > + for (i = 0; i < cu_info->num_shader_engines; i++) { > > + for (j = 0; j < cu_info- > >num_shader_arrays_per_engine; j++) { > > + pcache->sibling_map[k] = > (uint8_t)(cu_sibling_map_mask & 0xFF); > > + pcache->sibling_map[k+1] = > (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); > > + pcache->sibling_map[k+2] = > (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); > > + pcache->sibling_map[k+3] = > (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); > > + k += 4; > > + > > + cu_sibling_map_mask = cu_info- > >cu_bitmap[start][i % 4][j + i / > > +4]; > > Shouldn't this use "xcc" as index instead of "start"? > Yes that’s correct. Thanks for the catch. I will fix this before committing. > Other than that, this patch is > > Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > > P.S.: This whole function looks suspicious to me the way it exposes > active/inactive CUs to user mode. I think user mode only wants to count active > CUs in the sibling mask because it has no other information about inactive > CUs. And reading the cu_info into cu_sibling_map_mask and then only using it > in the next loop iteration is probably also wrong. It basically throws away > cu_info from the last xcc,SE,SA. Or maybe I just don't understand what this > function is trying to do. > Yes the function is a bit convoluted to understand. It starts with the following code before entering the loop. cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); ... cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); ... It again re-uses cu_info->cu_bitmap[start][0][0]; in the second iteration. I am not sure if it is doing the right thing either. I will try to take a look at it and understand what exactly we want to achieve with this function. Regards, Mukul > > > + cu_sibling_map_mask &= ((1 << > pcache_info[cache_type].num_cu_shared) - 1); > > + } > > } > > } > > pcache->sibling_map_size = k; > > @@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct > kfd_cache_properties **props_ext, > > static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, > struct kfd_node *kdev) > > { > > struct kfd_gpu_cache_info *pcache_info = NULL; > > - int i, j, k; > > + int i, j, k, xcc, start, end; > > int ct = 0; > > unsigned int cu_processor_id; > > int ret; > > @@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct > kfd_topology_device *dev, struct > > * then it will consider only one CU from > > * the shared unit > > */ > > + start = ffs(kdev->xcc_mask) - 1; > > + end = start + NUM_XCC(kdev->xcc_mask); > > + > > for (ct = 0; ct < num_of_cache_types; ct++) { > > cu_processor_id = gpu_processor_id; > > if (pcache_info[ct].cache_level == 1) { > > - for (i = 0; i < pcu_info->num_shader_engines; i++) { > > - for (j = 0; j < pcu_info- > >num_shader_arrays_per_engine; j++) { > > - for (k = 0; k < pcu_info- > >num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { > > - > > - ret = > fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, > > - > pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, > > - > cu_processor_id, k); > > - > > - if (ret < 0) > > - break; > > - > > - if (!ret) { > > - num_of_entries++; > > - > list_add_tail(&props_ext->list, &dev->cache_props); > > + for (xcc = start; xcc < end; xcc++) { > > + for (i = 0; i < pcu_info->num_shader_engines; > i++) { > > + for (j = 0; j < pcu_info- > >num_shader_arrays_per_engine; j++) { > > + for (k = 0; k < pcu_info- > >num_cu_per_sh; k += > > +pcache_info[ct].num_cu_shared) { > > + > > + ret = > fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, > > + > pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, > > + > cu_processor_id, k); > > + > > + if (ret < 0) > > + break; > > + > > + if (!ret) { > > + > num_of_entries++; > > + > list_add_tail(&props_ext->list, &dev->cache_props); > > + } > > + > > + /* Move to next CU > block */ > > + num_cu_shared = ((k > + pcache_info[ct].num_cu_shared) <= > > + pcu_info- > >num_cu_per_sh) ? > > + > pcache_info[ct].num_cu_shared : > > + (pcu_info- > >num_cu_per_sh - k); > > + cu_processor_id += > num_cu_shared; > > } > > - > > - /* Move to next CU block */ > > - num_cu_shared = ((k + > pcache_info[ct].num_cu_shared) <= > > - pcu_info- > >num_cu_per_sh) ? > > - > pcache_info[ct].num_cu_shared : > > - (pcu_info- > >num_cu_per_sh - k); > > - cu_processor_id += > num_cu_shared; > > } > > } > > } > > } else { > > ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, > > - pcu_info, ct, > cu_processor_id); > > + pcu_info, ct, cu_processor_id, kdev); > > > > if (ret < 0) > > break; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > > b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > > index dea32a9e5506..27386ce9a021 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > > @@ -89,7 +89,7 @@ struct kfd_mem_properties { > > struct attribute attr; > > }; > > > > -#define CACHE_SIBLINGMAP_SIZE 64 > > +#define CACHE_SIBLINGMAP_SIZE 128 > > > > struct kfd_cache_properties { > > struct list_head list;