Hi Felix, On 10/25/2022 5:06 AM, Felix Kuehling wrote: > > On 2022-10-24 07:26, Ma Jun wrote: >> For some GPUs with more CUs, the original sibling_map[32] >> in struct crat_subtype_cache is not enough >> to save the cache information when create the VCRAT table, >> so fill the cache info into struct kfd_cache_properties_ext >> directly to fix the problem. >> >> At the same time, a new directory >> "/sys/class/kfd/kfd/topology/nodes/*nodes_num*/caches_ext" >> is created for cache information showing. > > Is this necessary because existing user mode cannot handle the larger > sibling map? If so, you'll also need to update the Thunk to parse the > information from caches_ext. Do you have a link to that patch? > > If user mode can handle a larger siblingmap in the existing sysfs > interface, we should not create new one. > > Another alternative is to add more lines to the existing cache info > sysfs entries. Older user mode will probably ignore the ones it doesn't > know about. > Yes, it's not necessary to make a new directory for cache. How about the code below? +#define CAHCHE_INFO_FROM_CRAT 0x00000001 +#define CAHCHE_INFO_FROM_VCRAT 0x00000002 + +#define VCRAT_SIBLINGMAP_SIZE 64 + struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; @@ -98,9 +103,11 @@ struct kfd_cache_properties { uint32_t cache_latency; uint32_t cache_type; uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint8_t sibling_map_ext[VCRAT_SIBLINGMAP_SIZE]; struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; + uint32_t flags; }; cache = container_of(attr, struct kfd_cache_properties, attr); if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) return -EPERM; @@ -402,12 +1142,22 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); - offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); - for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) - for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) - /* Check each bit */ - offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", - (cache->sibling_map[i] >> j) & 1); + + if (cache->flags & CAHCHE_INFO_FROM_CRAT) { + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) + /* Check each bit */ + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", + (cache->sibling_map[i] >> j) & 1); + } else if (cache->flags & CAHCHE_INFO_FROM_VCRAT) { + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); + for (i = 0; i < VCRAT_SIBLINGMAP_SIZE; i++) + for (j = 0; j < sizeof(cache->sibling_map_ext[0])*8; j++) + /* Check each bit */ + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", + (cache->sibling_map_ext[i] >> j) & 1); + } Regards, Ma Jun > Regards, > Felix > > >> >> The original directory "cache" is reserved for GPU which using real CRAT >> table. >> >> Signed-off-by: Ma Jun <Jun.Ma2@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1229 +------------------- >> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1246 ++++++++++++++++++++- >> drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 21 + >> 3 files changed, 1261 insertions(+), 1235 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> index 4857ec5b9f46..e6928c60338e 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c >> @@ -30,799 +30,6 @@ >> #include "amdgpu.h" >> #include "amdgpu_amdkfd.h" >> >> -/* Static table to describe GPU Cache information */ >> -struct kfd_gpu_cache_info { >> - uint32_t cache_size; >> - uint32_t cache_level; >> - uint32_t flags; >> - /* Indicates how many Compute Units share this cache >> - * within a SA. Value = 1 indicates the cache is not shared >> - */ >> - uint32_t num_cu_shared; >> -}; >> - >> -static struct kfd_gpu_cache_info kaveri_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache (in SQC module) per bank */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache (in SQC module) per bank */ >> - .cache_size = 8, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - >> - /* TODO: Add L2 Cache information */ >> -}; >> - >> - >> -static struct kfd_gpu_cache_info carrizo_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache (in SQC module) per bank */ >> - .cache_size = 8, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 4, >> - }, >> - { >> - /* Scalar L1 Data Cache (in SQC module) per bank. */ >> - .cache_size = 4, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 4, >> - }, >> - >> - /* TODO: Add L2 Cache information */ >> -}; >> - >> -#define hawaii_cache_info kaveri_cache_info >> -#define tonga_cache_info carrizo_cache_info >> -#define fiji_cache_info carrizo_cache_info >> -#define polaris10_cache_info carrizo_cache_info >> -#define polaris11_cache_info carrizo_cache_info >> -#define polaris12_cache_info carrizo_cache_info >> -#define vegam_cache_info carrizo_cache_info >> - >> -/* NOTE: L1 cache information has been updated and L2/L3 >> - * cache information has been added for Vega10 and >> - * newer ASICs. The unit for cache_size is KiB. >> - * In future, check & update cache details >> - * for every new ASIC is required. >> - */ >> - >> -static struct kfd_gpu_cache_info vega10_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 4096, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 16, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info raven_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 1024, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 11, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info renoir_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 1024, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info vega12_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 2048, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 5, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info vega20_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 3, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 8192, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 16, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info aldebaran_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 8192, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 14, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info navi10_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 4096, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info vangogh_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 1024, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info navi14_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 12, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 2048, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 12, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 4096, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> - { >> - /* L3 Data Cache per GPU */ >> - .cache_size = 128*1024, >> - .cache_level = 3, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 3072, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> - { >> - /* L3 Data Cache per GPU */ >> - .cache_size = 96*1024, >> - .cache_level = 3, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 10, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 2048, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> - { >> - /* L3 Data Cache per GPU */ >> - .cache_size = 32*1024, >> - .cache_level = 3, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info beige_goby_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 1024, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> - { >> - /* L3 Data Cache per GPU */ >> - .cache_size = 16*1024, >> - .cache_level = 3, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 8, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 6, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 2048, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 6, >> - }, >> -}; >> - >> -static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { >> - { >> - /* TCP L1 Cache per CU */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 1, >> - }, >> - { >> - /* Scalar L1 Instruction Cache per SQC */ >> - .cache_size = 32, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* Scalar L1 Data Cache per SQC */ >> - .cache_size = 16, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* GL1 Data Cache per SA */ >> - .cache_size = 128, >> - .cache_level = 1, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> - { >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - .cache_size = 256, >> - .cache_level = 2, >> - .flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE), >> - .num_cu_shared = 2, >> - }, >> -}; >> - >> static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, >> struct crat_subtype_computeunit *cu) >> { >> @@ -1223,419 +430,6 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, >> return ret; >> } >> >> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ >> -static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, >> - struct kfd_gpu_cache_info *pcache_info, >> - struct kfd_cu_info *cu_info, >> - int mem_available, >> - int cu_bitmask, >> - int cache_type, unsigned int cu_processor_id, >> - int cu_block) >> -{ >> - unsigned int cu_sibling_map_mask; >> - int first_active_cu; >> - >> - /* First check if enough memory is available */ >> - if (sizeof(struct crat_subtype_cache) > mem_available) >> - return -ENOMEM; >> - >> - cu_sibling_map_mask = cu_bitmask; >> - cu_sibling_map_mask >>= cu_block; >> - cu_sibling_map_mask &= >> - ((1 << pcache_info[cache_type].num_cu_shared) - 1); >> - first_active_cu = ffs(cu_sibling_map_mask); >> - >> - /* CU could be inactive. In case of shared cache find the first active >> - * CU. and incase of non-shared cache check if the CU is inactive. If >> - * inactive active skip it >> - */ >> - if (first_active_cu) { >> - memset(pcache, 0, sizeof(struct crat_subtype_cache)); >> - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; >> - pcache->length = sizeof(struct crat_subtype_cache); >> - pcache->flags = pcache_info[cache_type].flags; >> - pcache->processor_id_low = cu_processor_id >> - + (first_active_cu - 1); >> - pcache->cache_level = pcache_info[cache_type].cache_level; >> - pcache->cache_size = pcache_info[cache_type].cache_size; >> - >> - /* Sibling map is w.r.t processor_id_low, so shift out >> - * inactive CU >> - */ >> - cu_sibling_map_mask = >> - cu_sibling_map_mask >> (first_active_cu - 1); >> - >> - pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); >> - pcache->sibling_map[1] = >> - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); >> - pcache->sibling_map[2] = >> - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); >> - pcache->sibling_map[3] = >> - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); >> - return 0; >> - } >> - return 1; >> -} >> - >> -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ >> -static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, >> - struct kfd_gpu_cache_info *pcache_info, >> - struct kfd_cu_info *cu_info, >> - int mem_available, >> - int cache_type, unsigned int cu_processor_id) >> -{ >> - unsigned int cu_sibling_map_mask; >> - int first_active_cu; >> - int i, j, k; >> - >> - /* First check if enough memory is available */ >> - if (sizeof(struct crat_subtype_cache) > mem_available) >> - return -ENOMEM; >> - >> - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; >> - cu_sibling_map_mask &= >> - ((1 << pcache_info[cache_type].num_cu_shared) - 1); >> - first_active_cu = ffs(cu_sibling_map_mask); >> - >> - /* CU could be inactive. In case of shared cache find the first active >> - * CU. and incase of non-shared cache check if the CU is inactive. If >> - * inactive active skip it >> - */ >> - if (first_active_cu) { >> - memset(pcache, 0, sizeof(struct crat_subtype_cache)); >> - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; >> - pcache->length = sizeof(struct crat_subtype_cache); >> - pcache->flags = pcache_info[cache_type].flags; >> - pcache->processor_id_low = cu_processor_id >> - + (first_active_cu - 1); >> - pcache->cache_level = pcache_info[cache_type].cache_level; >> - pcache->cache_size = pcache_info[cache_type].cache_size; >> - >> - /* Sibling map is w.r.t processor_id_low, so shift out >> - * inactive CU >> - */ >> - cu_sibling_map_mask = >> - cu_sibling_map_mask >> (first_active_cu - 1); >> - k = 0; >> - for (i = 0; i < cu_info->num_shader_engines; i++) { >> - for (j = 0; j < cu_info->num_shader_arrays_per_engine; >> - j++) { >> - pcache->sibling_map[k] = >> - (uint8_t)(cu_sibling_map_mask & 0xFF); >> - pcache->sibling_map[k+1] = >> - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); >> - pcache->sibling_map[k+2] = >> - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); >> - pcache->sibling_map[k+3] = >> - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); >> - k += 4; >> - cu_sibling_map_mask = >> - cu_info->cu_bitmap[i % 4][j + i / 4]; >> - cu_sibling_map_mask &= ( >> - (1 << pcache_info[cache_type].num_cu_shared) >> - - 1); >> - } >> - } >> - return 0; >> - } >> - return 1; >> -} >> - >> -#define KFD_MAX_CACHE_TYPES 6 >> - >> -static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, >> - struct kfd_gpu_cache_info *pcache_info) >> -{ >> - struct amdgpu_device *adev = kdev->adev; >> - int i = 0; >> - >> - /* TCP L1 Cache per CU */ >> - if (adev->gfx.config.gc_tcp_l1_size) { >> - pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; >> - pcache_info[i].cache_level = 1; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; >> - i++; >> - } >> - /* Scalar L1 Instruction Cache per SQC */ >> - if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { >> - pcache_info[i].cache_size = >> - adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; >> - pcache_info[i].cache_level = 1; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_INST_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; >> - i++; >> - } >> - /* Scalar L1 Data Cache per SQC */ >> - if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { >> - pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; >> - pcache_info[i].cache_level = 1; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; >> - i++; >> - } >> - /* GL1 Data Cache per SA */ >> - if (adev->gfx.config.gc_gl1c_per_sa && >> - adev->gfx.config.gc_gl1c_size_per_instance) { >> - pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * >> - adev->gfx.config.gc_gl1c_size_per_instance; >> - pcache_info[i].cache_level = 1; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> - i++; >> - } >> - /* L2 Data Cache per GPU (Total Tex Cache) */ >> - if (adev->gfx.config.gc_gl2c_per_gpu) { >> - pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; >> - pcache_info[i].cache_level = 2; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> - i++; >> - } >> - /* L3 Data Cache per GPU */ >> - if (adev->gmc.mall_size) { >> - pcache_info[i].cache_size = adev->gmc.mall_size / 1024; >> - pcache_info[i].cache_level = 3; >> - pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> - CRAT_CACHE_FLAGS_DATA_CACHE | >> - CRAT_CACHE_FLAGS_SIMD_CACHE); >> - pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> - i++; >> - } >> - return i; >> -} >> - >> -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info >> - * tables >> - * >> - * @kdev - [IN] GPU device >> - * @gpu_processor_id - [IN] GPU processor ID to which these caches >> - * associate >> - * @available_size - [IN] Amount of memory available in pcache >> - * @cu_info - [IN] Compute Unit info obtained from KGD >> - * @pcache - [OUT] memory into which cache data is to be filled in. >> - * @size_filled - [OUT] amount of data used up in pcache. >> - * @num_of_entries - [OUT] number of caches added >> - */ >> -static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, >> - int gpu_processor_id, >> - int available_size, >> - struct kfd_cu_info *cu_info, >> - struct crat_subtype_cache *pcache, >> - int *size_filled, >> - int *num_of_entries) >> -{ >> - struct kfd_gpu_cache_info *pcache_info; >> - struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; >> - int num_of_cache_types = 0; >> - int i, j, k; >> - int ct = 0; >> - int mem_available = available_size; >> - unsigned int cu_processor_id; >> - int ret; >> - unsigned int num_cu_shared; >> - >> - switch (kdev->adev->asic_type) { >> - case CHIP_KAVERI: >> - pcache_info = kaveri_cache_info; >> - num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); >> - break; >> - case CHIP_HAWAII: >> - pcache_info = hawaii_cache_info; >> - num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); >> - break; >> - case CHIP_CARRIZO: >> - pcache_info = carrizo_cache_info; >> - num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); >> - break; >> - case CHIP_TONGA: >> - pcache_info = tonga_cache_info; >> - num_of_cache_types = ARRAY_SIZE(tonga_cache_info); >> - break; >> - case CHIP_FIJI: >> - pcache_info = fiji_cache_info; >> - num_of_cache_types = ARRAY_SIZE(fiji_cache_info); >> - break; >> - case CHIP_POLARIS10: >> - pcache_info = polaris10_cache_info; >> - num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); >> - break; >> - case CHIP_POLARIS11: >> - pcache_info = polaris11_cache_info; >> - num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); >> - break; >> - case CHIP_POLARIS12: >> - pcache_info = polaris12_cache_info; >> - num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); >> - break; >> - case CHIP_VEGAM: >> - pcache_info = vegam_cache_info; >> - num_of_cache_types = ARRAY_SIZE(vegam_cache_info); >> - break; >> - default: >> - switch (KFD_GC_VERSION(kdev)) { >> - case IP_VERSION(9, 0, 1): >> - pcache_info = vega10_cache_info; >> - num_of_cache_types = ARRAY_SIZE(vega10_cache_info); >> - break; >> - case IP_VERSION(9, 2, 1): >> - pcache_info = vega12_cache_info; >> - num_of_cache_types = ARRAY_SIZE(vega12_cache_info); >> - break; >> - case IP_VERSION(9, 4, 0): >> - case IP_VERSION(9, 4, 1): >> - pcache_info = vega20_cache_info; >> - num_of_cache_types = ARRAY_SIZE(vega20_cache_info); >> - break; >> - case IP_VERSION(9, 4, 2): >> - pcache_info = aldebaran_cache_info; >> - num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); >> - break; >> - case IP_VERSION(9, 1, 0): >> - case IP_VERSION(9, 2, 2): >> - pcache_info = raven_cache_info; >> - num_of_cache_types = ARRAY_SIZE(raven_cache_info); >> - break; >> - case IP_VERSION(9, 3, 0): >> - pcache_info = renoir_cache_info; >> - num_of_cache_types = ARRAY_SIZE(renoir_cache_info); >> - break; >> - case IP_VERSION(10, 1, 10): >> - case IP_VERSION(10, 1, 2): >> - case IP_VERSION(10, 1, 3): >> - case IP_VERSION(10, 1, 4): >> - pcache_info = navi10_cache_info; >> - num_of_cache_types = ARRAY_SIZE(navi10_cache_info); >> - break; >> - case IP_VERSION(10, 1, 1): >> - pcache_info = navi14_cache_info; >> - num_of_cache_types = ARRAY_SIZE(navi14_cache_info); >> - break; >> - case IP_VERSION(10, 3, 0): >> - pcache_info = sienna_cichlid_cache_info; >> - num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); >> - break; >> - case IP_VERSION(10, 3, 2): >> - pcache_info = navy_flounder_cache_info; >> - num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); >> - break; >> - case IP_VERSION(10, 3, 4): >> - pcache_info = dimgrey_cavefish_cache_info; >> - num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); >> - break; >> - case IP_VERSION(10, 3, 1): >> - pcache_info = vangogh_cache_info; >> - num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); >> - break; >> - case IP_VERSION(10, 3, 5): >> - pcache_info = beige_goby_cache_info; >> - num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); >> - break; >> - case IP_VERSION(10, 3, 3): >> - case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */ >> - pcache_info = yellow_carp_cache_info; >> - num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); >> - break; >> - case IP_VERSION(10, 3, 6): >> - pcache_info = gc_10_3_6_cache_info; >> - num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); >> - break; >> - case IP_VERSION(11, 0, 0): >> - case IP_VERSION(11, 0, 1): >> - case IP_VERSION(11, 0, 2): >> - case IP_VERSION(11, 0, 3): >> - pcache_info = cache_info; >> - num_of_cache_types = >> - kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); >> - break; >> - default: >> - return -EINVAL; >> - } >> - } >> - >> - *size_filled = 0; >> - *num_of_entries = 0; >> - >> - /* For each type of cache listed in the kfd_gpu_cache_info table, >> - * go through all available Compute Units. >> - * The [i,j,k] loop will >> - * if kfd_gpu_cache_info.num_cu_shared = 1 >> - * will parse through all available CU >> - * If (kfd_gpu_cache_info.num_cu_shared != 1) >> - * then it will consider only one CU from >> - * the shared unit >> - */ >> - >> - for (ct = 0; ct < num_of_cache_types; ct++) { >> - cu_processor_id = gpu_processor_id; >> - if (pcache_info[ct].cache_level == 1) { >> - for (i = 0; i < cu_info->num_shader_engines; i++) { >> - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { >> - for (k = 0; k < cu_info->num_cu_per_sh; >> - k += pcache_info[ct].num_cu_shared) { >> - ret = fill_in_l1_pcache(pcache, >> - pcache_info, >> - cu_info, >> - mem_available, >> - cu_info->cu_bitmap[i % 4][j + i / 4], >> - ct, >> - cu_processor_id, >> - k); >> - >> - if (ret < 0) >> - break; >> - >> - if (!ret) { >> - pcache++; >> - (*num_of_entries)++; >> - mem_available -= sizeof(*pcache); >> - (*size_filled) += sizeof(*pcache); >> - } >> - >> - /* Move to next CU block */ >> - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= >> - cu_info->num_cu_per_sh) ? >> - pcache_info[ct].num_cu_shared : >> - (cu_info->num_cu_per_sh - k); >> - cu_processor_id += num_cu_shared; >> - } >> - } >> - } >> - } else { >> - ret = fill_in_l2_l3_pcache(pcache, >> - pcache_info, >> - cu_info, >> - mem_available, >> - ct, >> - cu_processor_id); >> - >> - if (ret < 0) >> - break; >> - >> - if (!ret) { >> - pcache++; >> - (*num_of_entries)++; >> - mem_available -= sizeof(*pcache); >> - (*size_filled) += sizeof(*pcache); >> - } >> - } >> - } >> - >> - pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); >> - >> - return 0; >> -} >> - >> static bool kfd_ignore_crat(void) >> { >> bool ret; >> @@ -2203,8 +997,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, >> struct crat_subtype_computeunit *cu; >> struct kfd_cu_info cu_info; >> int avail_size = *size; >> - int num_of_cache_entries = 0; >> - int cache_mem_filled = 0; >> uint32_t nid = 0; >> int ret = 0; >> >> @@ -2304,31 +1096,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, >> crat_table->length += sizeof(struct crat_subtype_memory); >> crat_table->total_entries++; >> >> - /* TODO: Fill in cache information. This information is NOT readily >> - * available in KGD >> - */ >> - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + >> - sub_type_hdr->length); >> - ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, >> - avail_size, >> - &cu_info, >> - (struct crat_subtype_cache *)sub_type_hdr, >> - &cache_mem_filled, >> - &num_of_cache_entries); >> - >> - if (ret < 0) >> - return ret; >> - >> - crat_table->length += cache_mem_filled; >> - crat_table->total_entries += num_of_cache_entries; >> - avail_size -= cache_mem_filled; >> - >> /* Fill in Subtype: IO_LINKS >> * Only direct links are added here which is Link from GPU to >> * its NUMA node. Indirect links are added by userspace. >> */ >> sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + >> - cache_mem_filled); >> + sub_type_hdr->length); >> ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, >> (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> index e0680d265a66..97e88c35be01 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> @@ -50,6 +50,747 @@ static struct kfd_system_properties sys_props; >> static DECLARE_RWSEM(topology_lock); >> static uint32_t topology_crat_proximity_domain; >> >> +/* Static table to describe GPU Cache information */ >> +struct kfd_gpu_cache_info { >> + uint32_t cache_size; >> + uint32_t cache_level; >> + uint32_t flags; >> + /* Indicates how many Compute Units share this cache >> + * within a SA. Value = 1 indicates the cache is not shared >> + */ >> + uint32_t num_cu_shared; >> +}; >> + >> +static struct kfd_gpu_cache_info kaveri_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache (in SQC module) per bank */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache (in SQC module) per bank */ >> + .cache_size = 8, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + >> + /* TODO: Add L2 Cache information */ >> +}; >> + >> +static struct kfd_gpu_cache_info carrizo_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache (in SQC module) per bank */ >> + .cache_size = 8, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 4, >> + }, >> + { >> + /* Scalar L1 Data Cache (in SQC module) per bank. */ >> + .cache_size = 4, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 4, >> + }, >> + >> + /* TODO: Add L2 Cache information */ >> +}; >> + >> +#define hawaii_cache_info kaveri_cache_info >> +#define tonga_cache_info carrizo_cache_info >> +#define fiji_cache_info carrizo_cache_info >> +#define polaris10_cache_info carrizo_cache_info >> +#define polaris11_cache_info carrizo_cache_info >> +#define polaris12_cache_info carrizo_cache_info >> +#define vegam_cache_info carrizo_cache_info >> + >> +/* NOTE: L1 cache information has been updated and L2/L3 >> + * cache information has been added for Vega10 and >> + * newer ASICs. The unit for cache_size is KiB. >> + * In future, check & update cache details >> + * for every new ASIC is required. >> + */ >> +static struct kfd_gpu_cache_info vega10_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 4096, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 16, >> + }, >> +}; >> +static struct kfd_gpu_cache_info raven_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 1024, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 11, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info renoir_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 1024, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info vega12_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 2048, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 5, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info vega20_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 3, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 8192, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 16, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info aldebaran_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 8192, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 14, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info navi10_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 4096, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info vangogh_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 1024, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info navi14_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 12, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 2048, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 12, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 4096, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> + { >> + /* L3 Data Cache per GPU */ >> + .cache_size = 128*1024, >> + .cache_level = 3, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 3072, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> + { >> + /* L3 Data Cache per GPU */ >> + .cache_size = 96*1024, >> + .cache_level = 3, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 10, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 2048, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> + { >> + /* L3 Data Cache per GPU */ >> + .cache_size = 32*1024, >> + .cache_level = 3, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> +}; >> + >> +static struct kfd_gpu_cache_info beige_goby_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 1024, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> + { >> + /* L3 Data Cache per GPU */ >> + .cache_size = 16*1024, >> + .cache_level = 3, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 8, >> + }, >> +}; >> +static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { >> + { >> + /* TCP L1 Cache per CU */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 1, >> + }, >> + { >> + /* Scalar L1 Instruction Cache per SQC */ >> + .cache_size = 32, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* Scalar L1 Data Cache per SQC */ >> + .cache_size = 16, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 2, >> + }, >> + { >> + /* GL1 Data Cache per SA */ >> + .cache_size = 128, >> + .cache_level = 1, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 6, >> + }, >> + { >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + .cache_size = 2048, >> + .cache_level = 2, >> + .flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE), >> + .num_cu_shared = 6, >> + }, >> +}; >> + >> struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( >> uint32_t proximity_domain) >> { >> @@ -149,6 +890,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) >> { >> struct kfd_mem_properties *mem; >> struct kfd_cache_properties *cache; >> + struct kfd_cache_properties_ext *cache_ext; >> struct kfd_iolink_properties *iolink; >> struct kfd_iolink_properties *p2plink; >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> @@ -171,6 +913,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) >> kfree(cache); >> } >> >> + while (dev->cache_props_ext.next != &dev->cache_props_ext) { >> + cache_ext = container_of(dev->cache_props_ext.next, >> + struct kfd_cache_properties_ext, list); >> + list_del(&cache_ext->list); >> + kfree(cache_ext); >> + } >> + >> while (dev->io_link_props.next != &dev->io_link_props) { >> iolink = container_of(dev->io_link_props.next, >> struct kfd_iolink_properties, list); >> @@ -227,6 +976,7 @@ struct kfd_topology_device *kfd_create_topology_device( >> >> INIT_LIST_HEAD(&dev->mem_props); >> INIT_LIST_HEAD(&dev->cache_props); >> + INIT_LIST_HEAD(&dev->cache_props_ext); >> INIT_LIST_HEAD(&dev->io_link_props); >> INIT_LIST_HEAD(&dev->p2p_link_props); >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> @@ -387,7 +1137,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, >> >> /* Making sure that the buffer is an empty string */ >> buffer[0] = 0; >> - >> cache = container_of(attr, struct kfd_cache_properties, attr); >> if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) >> return -EPERM; >> @@ -423,6 +1172,50 @@ static struct kobj_type cache_type = { >> .sysfs_ops = &cache_ops, >> }; >> >> +static ssize_t kfd_cache_ext_show(struct kobject *kobj, struct attribute *attr, >> + char *buffer) >> +{ >> + int offs = 0; >> + uint32_t i, j; >> + struct kfd_cache_properties_ext *cache; >> + >> + /* Making sure that the buffer is an empty string */ >> + buffer[0] = 0; >> + cache = container_of(attr, struct kfd_cache_properties_ext, attr); >> + if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) >> + return -EPERM; >> + sysfs_show_32bit_prop(buffer, offs, "processor_id_low", >> + cache->processor_id_low); >> + sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); >> + sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); >> + sysfs_show_32bit_prop(buffer, offs, "cache_line_size", >> + cache->cacheline_size); >> + sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag", >> + cache->cachelines_per_tag); >> + sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); >> + sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); >> + sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); >> + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); >> + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) >> + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) >> + /* Check each bit */ >> + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", >> + (cache->sibling_map[i] >> j) & 1); >> + >> + /* Replace the last "," with end of line */ >> + buffer[offs-1] = '\n'; >> + return offs; >> +} >> + >> +static const struct sysfs_ops cache_ext_ops = { >> + .show = kfd_cache_ext_show, >> +}; >> + >> +static struct kobj_type cache_ext_type = { >> + .release = kfd_topology_kobj_release, >> + .sysfs_ops = &cache_ext_ops, >> +}; >> + >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> /****** Sysfs of Performance Counters ******/ >> >> @@ -610,6 +1403,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) >> struct kfd_iolink_properties *p2plink; >> struct kfd_iolink_properties *iolink; >> struct kfd_cache_properties *cache; >> + struct kfd_cache_properties_ext *cache_ext; >> struct kfd_mem_properties *mem; >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> struct kfd_perf_properties *perf; >> @@ -663,6 +1457,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) >> dev->kobj_cache = NULL; >> } >> >> + if (dev->kobj_cache_ext) { >> + list_for_each_entry(cache_ext, &dev->cache_props_ext, list) >> + if (cache_ext->kobj) { >> + kfd_remove_sysfs_file(cache_ext->kobj, >> + &cache_ext->attr); >> + cache_ext->kobj = NULL; >> + } >> + kobject_del(dev->kobj_cache_ext); >> + kobject_put(dev->kobj_cache_ext); >> + dev->kobj_cache_ext = NULL; >> + } >> + >> if (dev->kobj_mem) { >> list_for_each_entry(mem, &dev->mem_props, list) >> if (mem->kobj) { >> @@ -707,6 +1513,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, >> struct kfd_iolink_properties *p2plink; >> struct kfd_iolink_properties *iolink; >> struct kfd_cache_properties *cache; >> + struct kfd_cache_properties_ext *cache_ext; >> struct kfd_mem_properties *mem; >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> struct kfd_perf_properties *perf; >> @@ -741,6 +1548,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, >> if (!dev->kobj_cache) >> return -ENOMEM; >> >> + dev->kobj_cache_ext = kobject_create_and_add("caches_ext", dev->kobj_node); >> + if (!dev->kobj_cache_ext) >> + return -ENOMEM; >> + >> dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); >> if (!dev->kobj_iolink) >> return -ENOMEM; >> @@ -830,6 +1641,28 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, >> i++; >> } >> >> + i = 0; >> + list_for_each_entry(cache_ext, &dev->cache_props_ext, list) { >> + cache_ext->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); >> + if (!cache_ext->kobj) >> + return -ENOMEM; >> + ret = kobject_init_and_add(cache_ext->kobj, &cache_ext_type, >> + dev->kobj_cache_ext, "%d", i); >> + if (ret < 0) { >> + kobject_put(cache_ext->kobj); >> + return ret; >> + } >> + >> + cache_ext->attr.name = "properties"; >> + cache_ext->attr.mode = KFD_SYSFS_FILE_MODE; >> + sysfs_attr_init(&cache_ext->attr); >> + ret = sysfs_create_file(cache_ext->kobj, &cache_ext->attr); >> + if (ret < 0) >> + return ret; >> + i++; >> + } >> + >> + >> i = 0; >> list_for_each_entry(iolink, &dev->io_link_props, list) { >> iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); >> @@ -1268,6 +2101,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) >> struct kfd_topology_device *out_dev = NULL; >> struct kfd_mem_properties *mem; >> struct kfd_cache_properties *cache; >> + struct kfd_cache_properties_ext *cache_ext; >> struct kfd_iolink_properties *iolink; >> struct kfd_iolink_properties *p2plink; >> >> @@ -1288,6 +2122,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) >> mem->gpu = dev->gpu; >> list_for_each_entry(cache, &dev->cache_props, list) >> cache->gpu = dev->gpu; >> + list_for_each_entry(cache, &dev->cache_props_ext, list) >> + cache_ext->gpu = dev->gpu; >> list_for_each_entry(iolink, &dev->io_link_props, list) >> iolink->gpu = dev->gpu; >> list_for_each_entry(p2plink, &dev->p2p_link_props, list) >> @@ -1721,6 +2557,397 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) >> dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; >> } >> >> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ >> +static int fill_in_l1_pcache(struct kfd_cache_properties_ext **props_ext, >> + struct kfd_gpu_cache_info *pcache_info, >> + struct kfd_cu_info *cu_info, >> + int cu_bitmask, >> + int cache_type, unsigned int cu_processor_id, >> + int cu_block) >> +{ >> + unsigned int cu_sibling_map_mask; >> + int first_active_cu; >> + struct kfd_cache_properties_ext *pcache = NULL; >> + >> + cu_sibling_map_mask = cu_bitmask; >> + cu_sibling_map_mask >>= cu_block; >> + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); >> + first_active_cu = ffs(cu_sibling_map_mask); >> + >> + /* CU could be inactive. In case of shared cache find the first active >> + * CU. and incase of non-shared cache check if the CU is inactive. If >> + * inactive active skip it >> + */ >> + if (first_active_cu) { >> + pcache = kfd_alloc_struct(pcache); >> + if (!pcache) >> + return -ENOMEM; >> + >> + memset(pcache, 0, sizeof(struct kfd_cache_properties_ext)); >> + pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); >> + pcache->cache_level = pcache_info[cache_type].cache_level; >> + pcache->cache_size = pcache_info[cache_type].cache_size; >> + >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_DATA; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_CPU; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; >> + >> + /* Sibling map is w.r.t processor_id_low, so shift out >> + * inactive CU >> + */ >> + cu_sibling_map_mask = >> + cu_sibling_map_mask >> (first_active_cu - 1); >> + >> + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); >> + pcache->sibling_map[1] = >> + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); >> + pcache->sibling_map[2] = >> + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); >> + pcache->sibling_map[3] = >> + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); >> + >> + *props_ext = pcache; >> + >> + return 0; >> + } >> + return 1; >> +} >> + >> +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ >> +static int fill_in_l2_l3_pcache(struct kfd_cache_properties_ext **props_ext, >> + struct kfd_gpu_cache_info *pcache_info, >> + struct kfd_cu_info *cu_info, >> + int cache_type, unsigned int cu_processor_id) >> +{ >> + unsigned int cu_sibling_map_mask; >> + int first_active_cu; >> + int i, j, k; >> + struct kfd_cache_properties_ext *pcache = NULL; >> + >> + cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; >> + cu_sibling_map_mask &= >> + ((1 << pcache_info[cache_type].num_cu_shared) - 1); >> + first_active_cu = ffs(cu_sibling_map_mask); >> + >> + /* CU could be inactive. In case of shared cache find the first active >> + * CU. and incase of non-shared cache check if the CU is inactive. If >> + * inactive active skip it >> + */ >> + if (first_active_cu) { >> + pcache = kfd_alloc_struct(pcache); >> + if (!pcache) >> + return -ENOMEM; >> + >> + memset(pcache, 0, sizeof(struct kfd_cache_properties_ext)); >> + pcache->processor_id_low = cu_processor_id >> + + (first_active_cu - 1); >> + pcache->cache_level = pcache_info[cache_type].cache_level; >> + pcache->cache_size = pcache_info[cache_type].cache_size; >> + >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_DATA; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_CPU; >> + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) >> + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; >> + >> + /* Sibling map is w.r.t processor_id_low, so shift out >> + * inactive CU >> + */ >> + cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); >> + k = 0; >> + >> + for (i = 0; i < cu_info->num_shader_engines; i++) { >> + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { >> + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); >> + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); >> + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); >> + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); >> + k += 4; >> + >> + cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; >> + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); >> + } >> + } >> + *props_ext = pcache; >> + return 0; >> + } >> + return 1; >> +} >> + >> +#define KFD_MAX_CACHE_TYPES 6 >> + >> +static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, >> + struct kfd_gpu_cache_info *pcache_info) >> +{ >> + struct amdgpu_device *adev = kdev->adev; >> + int i = 0; >> + >> + /* TCP L1 Cache per CU */ >> + if (adev->gfx.config.gc_tcp_l1_size) { >> + pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; >> + pcache_info[i].cache_level = 1; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; >> + i++; >> + } >> + /* Scalar L1 Instruction Cache per SQC */ >> + if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { >> + pcache_info[i].cache_size = >> + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; >> + pcache_info[i].cache_level = 1; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_INST_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; >> + i++; >> + } >> + /* Scalar L1 Data Cache per SQC */ >> + if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { >> + pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; >> + pcache_info[i].cache_level = 1; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; >> + i++; >> + } >> + /* GL1 Data Cache per SA */ >> + if (adev->gfx.config.gc_gl1c_per_sa && >> + adev->gfx.config.gc_gl1c_size_per_instance) { >> + pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * >> + adev->gfx.config.gc_gl1c_size_per_instance; >> + pcache_info[i].cache_level = 1; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> + i++; >> + } >> + /* L2 Data Cache per GPU (Total Tex Cache) */ >> + if (adev->gfx.config.gc_gl2c_per_gpu) { >> + pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; >> + pcache_info[i].cache_level = 2; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> + i++; >> + } >> + /* L3 Data Cache per GPU */ >> + if (adev->gmc.mall_size) { >> + pcache_info[i].cache_size = adev->gmc.mall_size / 1024; >> + pcache_info[i].cache_level = 3; >> + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | >> + CRAT_CACHE_FLAGS_DATA_CACHE | >> + CRAT_CACHE_FLAGS_SIMD_CACHE); >> + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; >> + i++; >> + } >> + return i; >> +} >> +/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info >> + * tables >> + */ >> +static int kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) >> +{ >> + struct kfd_gpu_cache_info *pcache_info = NULL; >> + struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; >> + int num_of_cache_types = 0; >> + int i, j, k; >> + int ct = 0; >> + unsigned int cu_processor_id; >> + int ret; >> + unsigned int num_cu_shared; >> + struct kfd_cu_info cu_info; >> + struct kfd_cu_info *pcu_info; >> + int gpu_processor_id; >> + struct kfd_cache_properties_ext *props_ext; >> + int num_of_entries = 0; >> + >> + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); >> + pcu_info = &cu_info; >> + >> + gpu_processor_id = kdev->processor_id_low; >> + >> + switch (kdev->adev->asic_type) { >> + case CHIP_KAVERI: >> + pcache_info = kaveri_cache_info; >> + num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); >> + break; >> + case CHIP_HAWAII: >> + pcache_info = hawaii_cache_info; >> + num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); >> + break; >> + case CHIP_CARRIZO: >> + pcache_info = carrizo_cache_info; >> + num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); >> + break; >> + case CHIP_TONGA: >> + pcache_info = tonga_cache_info; >> + num_of_cache_types = ARRAY_SIZE(tonga_cache_info); >> + break; >> + case CHIP_FIJI: >> + pcache_info = fiji_cache_info; >> + num_of_cache_types = ARRAY_SIZE(fiji_cache_info); >> + break; >> + case CHIP_POLARIS10: >> + pcache_info = polaris10_cache_info; >> + num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); >> + break; >> + case CHIP_POLARIS11: >> + pcache_info = polaris11_cache_info; >> + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); >> + break; >> + case CHIP_POLARIS12: >> + pcache_info = polaris12_cache_info; >> + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); >> + break; >> + case CHIP_VEGAM: >> + pcache_info = vegam_cache_info; >> + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); >> + break; >> + default: >> + switch (KFD_GC_VERSION(kdev)) { >> + case IP_VERSION(9, 0, 1): >> + pcache_info = vega10_cache_info; >> + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); >> + break; >> + case IP_VERSION(9, 2, 1): >> + pcache_info = vega12_cache_info; >> + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); >> + break; >> + case IP_VERSION(9, 4, 0): >> + case IP_VERSION(9, 4, 1): >> + pcache_info = vega20_cache_info; >> + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); >> + break; >> + case IP_VERSION(9, 4, 2): >> + pcache_info = aldebaran_cache_info; >> + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); >> + break; >> + case IP_VERSION(9, 1, 0): >> + case IP_VERSION(9, 2, 2): >> + pcache_info = raven_cache_info; >> + num_of_cache_types = ARRAY_SIZE(raven_cache_info); >> + break; >> + case IP_VERSION(9, 3, 0): >> + pcache_info = renoir_cache_info; >> + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); >> + break; >> + case IP_VERSION(10, 1, 10): >> + case IP_VERSION(10, 1, 2): >> + case IP_VERSION(10, 1, 3): >> + case IP_VERSION(10, 1, 4): >> + pcache_info = navi10_cache_info; >> + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); >> + break; >> + case IP_VERSION(10, 1, 1): >> + pcache_info = navi14_cache_info; >> + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); >> + break; >> + case IP_VERSION(10, 3, 0): >> + pcache_info = sienna_cichlid_cache_info; >> + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); >> + break; >> + case IP_VERSION(10, 3, 2): >> + pcache_info = navy_flounder_cache_info; >> + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); >> + break; >> + case IP_VERSION(10, 3, 4): >> + pcache_info = dimgrey_cavefish_cache_info; >> + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); >> + break; >> + case IP_VERSION(10, 3, 1): >> + pcache_info = vangogh_cache_info; >> + num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); >> + break; >> + case IP_VERSION(10, 3, 5): >> + pcache_info = beige_goby_cache_info; >> + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); >> + break; >> + case IP_VERSION(10, 3, 3): >> + case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */ >> + case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */ >> + pcache_info = yellow_carp_cache_info; >> + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); >> + break; >> + case IP_VERSION(11, 0, 0): >> + case IP_VERSION(11, 0, 1): >> + case IP_VERSION(11, 0, 2): >> + case IP_VERSION(11, 0, 3): >> + pcache_info = cache_info; >> + num_of_cache_types = >> + kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); >> + break; >> + default: >> + return -EINVAL; >> + } >> + } >> + >> + /* For each type of cache listed in the kfd_gpu_cache_info table, >> + * go through all available Compute Units. >> + * The [i,j,k] loop will >> + * if kfd_gpu_cache_info.num_cu_shared = 1 >> + * will parse through all available CU >> + * If (kfd_gpu_cache_info.num_cu_shared != 1) >> + * then it will consider only one CU from >> + * the shared unit >> + */ >> + for (ct = 0; ct < num_of_cache_types; ct++) { >> + cu_processor_id = gpu_processor_id; >> + if (pcache_info[ct].cache_level == 1) { >> + for (i = 0; i < pcu_info->num_shader_engines; i++) { >> + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { >> + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { >> + >> + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, >> + pcu_info->cu_bitmap[i % 4][j + i / 4], ct, >> + cu_processor_id, k); >> + >> + if (ret < 0) >> + break; >> + >> + if (!ret) { >> + num_of_entries++; >> + list_add_tail(&props_ext->list, &dev->cache_props_ext); >> + } >> + >> + /* Move to next CU block */ >> + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= >> + pcu_info->num_cu_per_sh) ? >> + pcache_info[ct].num_cu_shared : >> + (pcu_info->num_cu_per_sh - k); >> + cu_processor_id += num_cu_shared; >> + } >> + } >> + } >> + } else { >> + ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, >> + pcu_info, ct, cu_processor_id); >> + >> + if (ret < 0) >> + break; >> + >> + if (!ret) { >> + num_of_entries++; >> + list_add_tail(&props_ext->list, &dev->cache_props_ext); >> + } >> + } >> + } >> + pr_debug("Added [%d] GPU cache entries\n", num_of_entries); >> + return 0; >> +} >> + >> int kfd_topology_add_device(struct kfd_dev *gpu) >> { >> uint32_t gpu_id; >> @@ -1759,6 +2986,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) >> topology_crat_proximity_domain--; >> return res; >> } >> + >> res = kfd_parse_crat_table(crat_image, >> &temp_topology_device_list, >> proximity_domain); >> @@ -1771,23 +2999,27 @@ int kfd_topology_add_device(struct kfd_dev *gpu) >> >> kfd_topology_update_device_list(&temp_topology_device_list, >> &topology_device_list); >> + up_write(&topology_lock); >> + >> + dev = kfd_assign_gpu(gpu); >> + if (WARN_ON(!dev)) { >> + res = -ENODEV; >> + goto err; >> + } >> + >> + down_write(&topology_lock); >> + kfd_fill_cache_non_crat_info(dev, gpu); >> >> /* Update the SYSFS tree, since we added another topology >> * device >> */ >> res = kfd_topology_update_sysfs(); >> up_write(&topology_lock); >> - >> if (!res) >> sys_props.generation_count++; >> else >> pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", >> gpu_id, res); >> - dev = kfd_assign_gpu(gpu); >> - if (WARN_ON(!dev)) { >> - res = -ENODEV; >> - goto err; >> - } >> } >> >> dev->gpu_id = gpu_id; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> index dc4e239c8f8f..fc35fe9fa914 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h >> @@ -103,6 +103,25 @@ struct kfd_cache_properties { >> struct attribute attr; >> }; >> >> +#define VCRAT_SIBLINGMAP_SIZE 64 >> + >> +/* for GPUs with more CUs */ >> +struct kfd_cache_properties_ext { >> + struct list_head list; >> + uint32_t processor_id_low; >> + uint32_t cache_level; >> + uint32_t cache_size; >> + uint32_t cacheline_size; >> + uint32_t cachelines_per_tag; >> + uint32_t cache_assoc; >> + uint32_t cache_latency; >> + uint32_t cache_type; >> + uint8_t sibling_map[VCRAT_SIBLINGMAP_SIZE]; >> + struct kfd_dev *gpu; >> + struct kobject *kobj; >> + struct attribute attr; >> +}; >> + >> struct kfd_iolink_properties { >> struct list_head list; >> uint32_t iolink_type; >> @@ -139,6 +158,7 @@ struct kfd_topology_device { >> struct list_head mem_props; >> uint32_t cache_count; >> struct list_head cache_props; >> + struct list_head cache_props_ext; >> struct list_head io_link_props; >> struct list_head p2p_link_props; >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED >> @@ -148,6 +168,7 @@ struct kfd_topology_device { >> struct kobject *kobj_node; >> struct kobject *kobj_mem; >> struct kobject *kobj_cache; >> + struct kobject *kobj_cache_ext; >> struct kobject *kobj_iolink; >> struct kobject *kobj_p2plink; >> #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED