[AMD Public Use] Some comments inline. Don't address them yet until someone else reviews this more thoroughly though > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Mike Li > Sent: Monday, March 29, 2021 12:33 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Li, Mike > (Tianxin) <Tianxinmike.Li@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; > Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Subject: [PATCH] drm/amdkfd: Update L1 and add L2/3 cache information > > The L1 cache information has been updated and the L2/L3 > information has been added. The changes have been made > for Vega10 and newer ASICs. There are no changes > for the older ASICs before Vega10. > > BUG: SWDEV-260249 [KR] Take this out, the upstream community doesn't need to know about our BUG IDs > > Signed-off-by: Mike Li <Tianxinmike.Li@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 749 ++++++++++++++++++++++++-- > 1 file changed, 699 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > index c60e82697385..eb30324393a8 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > @@ -55,7 +55,7 @@ struct kfd_gpu_cache_info { > uint32_t cache_level; > uint32_t flags; > /* Indicates how many Compute Units share this cache > - * Value = 1 indicates the cache is not shared > + * within a SA. Value = 1 indicates the cache is not shared > */ > uint32_t num_cu_shared; > }; > @@ -69,7 +69,6 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = { > CRAT_CACHE_FLAGS_DATA_CACHE | > CRAT_CACHE_FLAGS_SIMD_CACHE), > .num_cu_shared = 1, > - [KR] Unrelated whitespace removal Kent > }, > { > /* Scalar L1 Instruction Cache (in SQC module) per bank */ > @@ -126,9 +125,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { > /* TODO: Add L2 Cache information */ > }; > > -/* NOTE: In future if more information is added to struct kfd_gpu_cache_info > - * the following ASICs may need a separate table. > - */ > #define hawaii_cache_info kaveri_cache_info > #define tonga_cache_info carrizo_cache_info > #define fiji_cache_info carrizo_cache_info > @@ -136,13 +132,562 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { > #define polaris11_cache_info carrizo_cache_info > #define polaris12_cache_info carrizo_cache_info > #define vegam_cache_info carrizo_cache_info > -/* TODO - check & update Vega10 cache details */ > -#define vega10_cache_info carrizo_cache_info > -#define raven_cache_info carrizo_cache_info > -#define renoir_cache_info carrizo_cache_info > -/* TODO - check & update Navi10 cache details */ > -#define navi10_cache_info carrizo_cache_info > -#define vangogh_cache_info carrizo_cache_info > + > +/* NOTE: L1 cache information has been updated and L2/L3 > + * cache information has been added for Vega10 and > + * newer ASICs. The unit for cache_size is KiB. > + * In future, check & update cache details > + * for every new ASIC is required. > + */ > + > +static struct kfd_gpu_cache_info vega10_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 4096, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 16, > + }, > +}; > + > +static struct kfd_gpu_cache_info raven_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 1024, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 11, > + }, > +}; > + > +static struct kfd_gpu_cache_info renoir_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 1024, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > +}; > + > +static struct kfd_gpu_cache_info vega12_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 2048, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 5, > + }, > +}; > + > +static struct kfd_gpu_cache_info vega20_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 3, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 8192, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 16, > + }, > +}; > + > +static struct kfd_gpu_cache_info aldebaran_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 8192, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 14, > + }, > +}; > + > +static struct kfd_gpu_cache_info navi10_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 4096, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > +}; > + > +static struct kfd_gpu_cache_info vangogh_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 1024, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > +}; > + > +static struct kfd_gpu_cache_info navi14_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 12, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 2048, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 12, > + }, > +}; > + > +static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 4096, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > + { > + /* L3 Data Cache per GPU */ > + .cache_size = 128*1024, > + .cache_level = 3, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > +}; > + > +static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 3072, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > + { > + /* L3 Data Cache per GPU */ > + .cache_size = 96*1024, > + .cache_level = 3, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 10, > + }, > +}; > + > +static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { > + { > + /* TCP L1 Cache per CU */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 1, > + }, > + { > + /* Scalar L1 Instruction Cache per SQC */ > + .cache_size = 32, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_INST_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* Scalar L1 Data Cache per SQC */ > + .cache_size = 16, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 2, > + }, > + { > + /* GL1 Data Cache per SA */ > + .cache_size = 128, > + .cache_level = 1, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > + { > + /* L2 Data Cache per GPU (Total Tex Cache) */ > + .cache_size = 2048, > + .cache_level = 2, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > + { > + /* L3 Data Cache per GPU */ > + .cache_size = 32*1024, > + .cache_level = 3, > + .flags = (CRAT_CACHE_FLAGS_ENABLED | > + CRAT_CACHE_FLAGS_DATA_CACHE | > + CRAT_CACHE_FLAGS_SIMD_CACHE), > + .num_cu_shared = 8, > + }, > +}; > > static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, > struct crat_subtype_computeunit *cu) > @@ -544,7 +1089,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head > *device_list, > } > > /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ > -static int fill_in_pcache(struct crat_subtype_cache *pcache, > +static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, > struct kfd_gpu_cache_info *pcache_info, > struct kfd_cu_info *cu_info, > int mem_available, > @@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache, > return 1; > } > > +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ > +static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, > + struct kfd_gpu_cache_info *pcache_info, > + struct kfd_cu_info *cu_info, > + int mem_available, > + int cache_type, unsigned int cu_processor_id) > +{ > + unsigned int cu_sibling_map_mask; > + int first_active_cu; > + int i, j, k; > + > + /* First check if enough memory is available */ > + if (sizeof(struct crat_subtype_cache) > mem_available) > + return -ENOMEM; > + > + cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; > + cu_sibling_map_mask &= > + ((1 << pcache_info[cache_type].num_cu_shared) - 1); > + first_active_cu = ffs(cu_sibling_map_mask); > + > + /* CU could be inactive. In case of shared cache find the first active > + * CU. and incase of non-shared cache check if the CU is inactive. If > + * inactive active skip it > + */ > + if (first_active_cu) { > + memset(pcache, 0, sizeof(struct crat_subtype_cache)); > + pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; > + pcache->length = sizeof(struct crat_subtype_cache); > + pcache->flags = pcache_info[cache_type].flags; > + pcache->processor_id_low = cu_processor_id > + + (first_active_cu - 1); > + pcache->cache_level = pcache_info[cache_type].cache_level; > + pcache->cache_size = pcache_info[cache_type].cache_size; > + > + /* Sibling map is w.r.t processor_id_low, so shift out > + * inactive CU > + */ > + cu_sibling_map_mask = > + cu_sibling_map_mask >> (first_active_cu - 1); > + k = 0; > + for (i = 0; i < cu_info->num_shader_engines; i++) { > + for (j = 0; j < cu_info->num_shader_arrays_per_engine; > + j++) { > + pcache->sibling_map[k] = > + (uint8_t)(cu_sibling_map_mask & 0xFF); > + pcache->sibling_map[k+1] = > + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); > + pcache->sibling_map[k+2] = > + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); > + pcache->sibling_map[k+3] = > + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); > + k += 4; > + cu_sibling_map_mask = > + cu_info->cu_bitmap[i % 4][j + i / 4]; > + cu_sibling_map_mask &= ( > + (1 << pcache_info[cache_type].num_cu_shared) > + - 1); > + } > + } > + return 0; > + } > + return 1; > +} > + > /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info > * tables > * > @@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, > int mem_available = available_size; > unsigned int cu_processor_id; > int ret; > + unsigned int num_cu_shared; > > switch (kdev->device_info->asic_family) { > case CHIP_KAVERI: > @@ -663,12 +1273,21 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, > num_of_cache_types = ARRAY_SIZE(vegam_cache_info); > break; > case CHIP_VEGA10: > + pcache_info = vega10_cache_info; > + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); > + break; > case CHIP_VEGA12: > + pcache_info = vega12_cache_info; > + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); > + break; > case CHIP_VEGA20: > case CHIP_ARCTURUS: > + pcache_info = vega20_cache_info; > + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); > + break; > case CHIP_ALDEBARAN: > - pcache_info = vega10_cache_info; > - num_of_cache_types = ARRAY_SIZE(vega10_cache_info); > + pcache_info = aldebaran_cache_info; > + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); > break; > case CHIP_RAVEN: > pcache_info = raven_cache_info; > @@ -680,12 +1299,24 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, > break; > case CHIP_NAVI10: > case CHIP_NAVI12: > + pcache_info = navi10_cache_info; > + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); > + break; > case CHIP_NAVI14: > + pcache_info = navi14_cache_info; > + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); > + break; > case CHIP_SIENNA_CICHLID: > + pcache_info = sienna_cichlid_cache_info; > + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); > + break; > case CHIP_NAVY_FLOUNDER: > + pcache_info = navy_flounder_cache_info; > + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); > + break; > case CHIP_DIMGREY_CAVEFISH: > - pcache_info = navi10_cache_info; > - num_of_cache_types = ARRAY_SIZE(navi10_cache_info); > + pcache_info = dimgrey_cavefish_cache_info; > + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); > break; > case CHIP_VANGOGH: > pcache_info = vangogh_cache_info; > @@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, > */ > > for (ct = 0; ct < num_of_cache_types; ct++) { > - cu_processor_id = gpu_processor_id; > - for (i = 0; i < cu_info->num_shader_engines; i++) { > - for (j = 0; j < cu_info->num_shader_arrays_per_engine; > - j++) { > - for (k = 0; k < cu_info->num_cu_per_sh; > - k += pcache_info[ct].num_cu_shared) { > - > - ret = fill_in_pcache(pcache, > - pcache_info, > - cu_info, > - mem_available, > - cu_info->cu_bitmap[i % 4][j + i / 4], > - ct, > - cu_processor_id, > - k); > - > - if (ret < 0) > - break; > - > - if (!ret) { > - pcache++; > - (*num_of_entries)++; > - mem_available -= > - sizeof(*pcache); > - (*size_filled) += > - sizeof(*pcache); > - } > - > - /* Move to next CU block */ > - cu_processor_id += > - pcache_info[ct].num_cu_shared; > - } > - } > + cu_processor_id = gpu_processor_id; > + if (pcache_info[ct].cache_level == 1) { > + for (i = 0; i < cu_info->num_shader_engines; i++) { > + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { > + for (k = 0; k < cu_info->num_cu_per_sh; > + k += pcache_info[ct].num_cu_shared) { > + ret = fill_in_l1_pcache(pcache, > + pcache_info, > + cu_info, > + mem_available, > + cu_info->cu_bitmap[i % 4][j + i / 4], > + ct, > + cu_processor_id, > + k); > + > + if (ret < 0) > + break; > + > + if (!ret) { > + pcache++; > + (*num_of_entries)++; > + mem_available -= sizeof(*pcache); > + (*size_filled) += sizeof(*pcache); > + } > + > + /* Move to next CU block */ > + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= > + cu_info->num_cu_per_sh) ? > + pcache_info[ct].num_cu_shared : > + (cu_info->num_cu_per_sh - k); > + cu_processor_id += num_cu_shared; > } > + } > + } > + } else { > + ret = fill_in_l2_l3_pcache(pcache, > + pcache_info, > + cu_info, > + mem_available, > + ct, > + cu_processor_id); > + > + if (ret < 0) > + break; > + > + if (!ret) { > + pcache++; > + (*num_of_entries)++; > + mem_available -= sizeof(*pcache); > + (*size_filled) += sizeof(*pcache); > + } > + } > } > > pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); > -- > 2.25.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.or > g%2Fmailman%2Flistinfo%2Famd- > gfx&data=04%7C01%7Ckent.russell%40amd.com%7C53330de655544e41b01a08d8f2d0 > 90a7%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637526324888770585%7CUn > known%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJX > VCI6Mn0%3D%7C1000&sdata=TCbRNbJN3LGMyYt4miQgFb1lX56tZ4nAyNAf6R%2Blj2c > %3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx