> -----Original Message----- > From: Eduardo Habkost [mailto:ehabkost@xxxxxxxxxx] > Sent: Monday, May 7, 2018 4:07 PM > To: Moger, Babu <Babu.Moger@xxxxxxx> > Cc: mst@xxxxxxxxxx; marcel@xxxxxxxxxx; pbonzini@xxxxxxxxxx; > rth@xxxxxxxxxxx; mtosatti@xxxxxxxxxx; geoff@xxxxxxxxxxxxxxx; > kash@xxxxxxxxxxxxxx; qemu-devel@xxxxxxxxxx; kvm@xxxxxxxxxxxxxxx > Subject: Re: [Qemu-devel] [PATCH v7 6/9] i386: Populate AMD Processor > Cache Information for cpuid 0x8000001D > > Hi, > > Sorry for taking so long to send feedback on this series: > > On Thu, Apr 26, 2018 at 11:26:46AM -0500, Babu Moger wrote: > > Add information for cpuid 0x8000001D leaf. Populate cache topology > information > > for different cache types(Data Cache, Instruction Cache, L2 and L3) > supported > > by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) > for AMD > > Family 17h Model for more details. > > > > Signed-off-by: Babu Moger <babu.moger@xxxxxxx> > > Tested-by: Geoffrey McRae <geoff@xxxxxxxxxxxxxxx> > > --- > > target/i386/cpu.c | 92 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > target/i386/kvm.c | 29 ++++++++++++++++-- > > 2 files changed, 118 insertions(+), 3 deletions(-) > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index 56d2f0b..1024b09 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -307,6 +307,14 @@ static uint32_t > encode_cache_cpuid80000005(CPUCacheInfo *cache) > > a == ASSOC_FULL ? 0xF : \ > > 0 /* invalid value */) > > > > +/* Definitions used on CPUID Leaf 0x8000001D */ > > +/* Number of logical cores in a complex */ > > +#define CORES_IN_CMPLX 4 > > +/* Number of logical processors sharing cache */ > > +#define NUM_SHARING_CACHE(threads) (threads ? \ > > + (((CORES_IN_CMPLX - 1) * 2) + 1) : \ > > + (CORES_IN_CMPLX - 1)) > > + > > Some questions about these macros: > * Why CORES_IN_CMPLX is a constant, and we're not using > nr_cores? This comes from the hardware design. The maximum cores in core complex is 4. L3 cache can be shared with 8 threads with 2 threads in each core. Idea here is to mimic the Hardware as close as possible to get the performance benefit. You can look at the datasheet https://www.amd.com/system/files/2017-06/AMD-EPYC-Data-Sheet.pdf and PPR https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf > * Why "2" is a constant, and we're not using nr_threads? Yes, I can use nr_threads here. Will change it. > * Why it's getting nr_threads-1 as argument instead of > nr_threads? Yes, I can make that change. > > > /* > > * Encode cache info for CPUID[0x80000006].ECX and > CPUID[0x80000006].EDX > > * @l3 can be NULL. > > @@ -336,6 +344,41 @@ static void > encode_cache_cpuid80000006(CPUCacheInfo *l2, > > } > > } > > > > +/* Encode cache info for CPUID[8000001D] */ > > +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int > nr_threads, > > + uint32_t *eax, uint32_t *ebx, > > + uint32_t *ecx, uint32_t *edx) > > +{ > > + assert(cache->size == cache->line_size * cache->associativity * > > + cache->partitions * cache->sets); > > + > > + *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | > > + (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); > > + > > + /* L3 is shared among multiple cores */ > > + if (cache->level == 3) { > > + *eax |= (NUM_SHARING_CACHE(nr_threads - 1) << 14); > > Isn't it simpler to write this as: > > *eax |= ((nr_cores * nr_threads) - 1) << 14; Ok. Let me try to simplify this whole code segment. > > > Or, even better: > > static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int > nr_logical_procs, > uint32_t *eax, uint32_t *ebx, > uint32_t *ecx, uint32_t *edx) > { > /* ... */ > /* No need to check cache->level here */ > *eax |= (nr_logical_procs - 1) << 14; > /* ... */ > } > > void cpu_x86_cpuid(...) > { > /* ... */ > case 0x8000001D: > switch (count) { > case 0: /* L1 dcache info */ > /* legacy_cache checks omitted in example for simplicity */ > encode_cache_cpuid8000001d(&env->cache_info.l1d_cache, > cs->nr_threads, > eax, ebx, ecx, edx); > break; > case 1: /* L1 icache info */ > encode_cache_cpuid8000001d(&env->cache_info.l1i_cache, > cs->nr_threads, > eax, ebx, ecx, edx); > break; > case 2: /* L2 cache info */ > encode_cache_cpuid8000001d(&env->cache_info.l2_cache, > cs->nr_threads, > eax, ebx, ecx, edx); > break; > case 3: /* L3 cache info */ > /* L3 is shared among multiple cores */ > encode_cache_cpuid8000001d(&env->cache_info.l3_cache, > cs->nr_threads * cs->nr_cores, > eax, ebx, ecx, edx); > break; > /* ... */ > } > > > > + } else { > > + *eax |= ((nr_threads - 1) << 14); > > + } > > + > > + assert(cache->line_size > 0); > > + assert(cache->partitions > 0); > > + assert(cache->associativity > 0); > > + /* We don't implement fully-associative caches */ > > + assert(cache->associativity < cache->sets); > > + *ebx = (cache->line_size - 1) | > > + ((cache->partitions - 1) << 12) | > > + ((cache->associativity - 1) << 22); > > + > > + assert(cache->sets > 0); > > + *ecx = cache->sets - 1; > > + > > + *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) | > > + (cache->inclusive ? CACHE_INCLUSIVE : 0) | > > + (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); > > +} > > + > > /* Definitions of the hardcoded cache entries we expose: */ > > > > /* L1 data cache: */ > > @@ -4013,6 +4056,55 @@ void cpu_x86_cpuid(CPUX86State *env, > uint32_t index, uint32_t count, > > *edx = 0; > > } > > break; > > + case 0x8000001D: > > + *eax = 0; > > + switch (count) { > > + case 0: /* L1 dcache info */ > > + if (env->cache_info.valid && !cpu->legacy_cache) { > > + encode_cache_cpuid8000001d(&env->cache_info.l1d_cache, > > + cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } else { > > + encode_cache_cpuid8000001d(&l1d_cache_amd, cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } > > + break; > > + case 1: /* L1 icache info */ > > + if (env->cache_info.valid && !cpu->legacy_cache) { > > + encode_cache_cpuid8000001d(&env->cache_info.l1i_cache, > > + cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } else { > > + encode_cache_cpuid8000001d(&l1i_cache_amd, > > + cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } > > + break; > > + case 2: /* L2 cache info */ > > + if (env->cache_info.valid && !cpu->legacy_cache) { > > + encode_cache_cpuid8000001d(&env->cache_info.l2_cache, > > + cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } else { > > + encode_cache_cpuid8000001d(&l2_cache_amd, cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } > > + break; > > + case 3: /* L3 cache info */ > > + if (env->cache_info.valid && !cpu->legacy_cache) { > > + encode_cache_cpuid8000001d(&env->cache_info.l3_cache, > > + cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } else { > > + encode_cache_cpuid8000001d(&l3_cache, cs->nr_threads, > > + eax, ebx, ecx, edx); > > + } > > + break; > > + default: /* end of info */ > > + *eax = *ebx = *ecx = *edx = 0; > > + break; > > + } > > + break; > > case 0xC0000000: > > *eax = env->cpuid_xlevel2; > > *ebx = 0; > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > > index 6c49954..6e66f9c 100644 > > --- a/target/i386/kvm.c > > +++ b/target/i386/kvm.c > > @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs) > > } > > c = &cpuid_data.entries[cpuid_i++]; > > > > - c->function = i; > > - c->flags = 0; > > - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); > > + switch (i) { > > + case 0x8000001d: > > + /* Query for all AMD cache information leaves */ > > + for (j = 0; ; j++) { > > + c->function = i; > > + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; > > + c->index = j; > > + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); > > + > > + if (c->eax == 0) { > > + break; > > + } > > + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { > > + fprintf(stderr, "cpuid_data is full, no space for " > > + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); > > + abort(); > > + } > > + c = &cpuid_data.entries[cpuid_i++]; > > + } > > + break; > > + default: > > + c->function = i; > > + c->flags = 0; > > + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); > > + break; > > + } > > } > > > > /* Call Centaur's CPUID instructions they are supported. */ > > -- > > 2.7.4 > > > > > > -- > Eduardo