On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote: > From: Zhao Liu <zhao1.liu@xxxxxxxxx> > > Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix > erroneous smp_num_siblings on Intel Hybrid platforms") is able to > handle platforms with Module level enumerated via CPUID.1F. > > Expose the module level in CPUID[0x1F] if the machine has more than 1 > modules. > > (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster > configurations in "-smp".) > > Signed-off-by: Zhao Liu <zhao1.liu@xxxxxxxxx> > Tested-by: Babu Moger <babu.moger@xxxxxxx> > Tested-by: Yongwei Ma <yongwei.ma@xxxxxxxxx> > Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > --- > Changes since v3: > * New patch to expose module level in 0x1F. > * Add Tested-by tag from Yongwei. > --- > target/i386/cpu.c | 12 +++++++++++- > target/i386/cpu.h | 2 ++ > target/i386/kvm/kvm.c | 2 +- > 3 files changed, 14 insertions(+), 2 deletions(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 294ca6b8947a..a2d39d2198b6 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info, > return 1; > case CPU_TOPO_LEVEL_CORE: > return topo_info->threads_per_core; > + case CPU_TOPO_LEVEL_MODULE: > + return topo_info->threads_per_core * topo_info->cores_per_module; > case CPU_TOPO_LEVEL_DIE: > return topo_info->threads_per_core * topo_info->cores_per_module * > topo_info->modules_per_die; > @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info, > return 0; > case CPU_TOPO_LEVEL_CORE: > return apicid_core_offset(topo_info); > + case CPU_TOPO_LEVEL_MODULE: > + return apicid_module_offset(topo_info); > case CPU_TOPO_LEVEL_DIE: > return apicid_die_offset(topo_info); > case CPU_TOPO_LEVEL_PACKAGE: > @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level) > return CPUID_1F_ECX_TOPO_LEVEL_SMT; > case CPU_TOPO_LEVEL_CORE: > return CPUID_1F_ECX_TOPO_LEVEL_CORE; > + case CPU_TOPO_LEVEL_MODULE: > + return CPUID_1F_ECX_TOPO_LEVEL_MODULE; > case CPU_TOPO_LEVEL_DIE: > return CPUID_1F_ECX_TOPO_LEVEL_DIE; > default: > @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, > if (env->nr_dies > 1) { > set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap); > } > + > + if (env->nr_modules > 1) { > + set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap); > + } > } > > *ecx = count & 0xff; > @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > break; > case 0x1F: > /* V2 Extended Topology Enumeration Leaf */ > - if (topo_info.dies_per_pkg < 2) { > + if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) { A question: Is the original checking necessary ? The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried on EMR: // leaf 0 0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69 // leaf 0x1f 0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004 0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004 0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004 // leaf 0xb 0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004 0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004 0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004 So here leads to different cpu behavior from bare metal, even in case of "-cpu host". In SDM Vol2, cpudid instruction section: " CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel recommends using leaf 1FH when available rather than leaf 0BH and ensuring that any leaf 0BH algorithms are updated to support leaf 1FH. " My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f) then it should have same values in lp/core level as 0xb. > *eax = *ebx = *ecx = *edx = 0; > break; > } > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index eecd30bde92b..97b290e10576 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1018,6 +1018,7 @@ enum CPUTopoLevel { > CPU_TOPO_LEVEL_INVALID, > CPU_TOPO_LEVEL_SMT, > CPU_TOPO_LEVEL_CORE, > + CPU_TOPO_LEVEL_MODULE, > CPU_TOPO_LEVEL_DIE, > CPU_TOPO_LEVEL_PACKAGE, > CPU_TOPO_LEVEL_MAX, > @@ -1032,6 +1033,7 @@ enum CPUTopoLevel { > #define CPUID_1F_ECX_TOPO_LEVEL_INVALID CPUID_B_ECX_TOPO_LEVEL_INVALID > #define CPUID_1F_ECX_TOPO_LEVEL_SMT CPUID_B_ECX_TOPO_LEVEL_SMT > #define CPUID_1F_ECX_TOPO_LEVEL_CORE CPUID_B_ECX_TOPO_LEVEL_CORE > +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE 3 > #define CPUID_1F_ECX_TOPO_LEVEL_DIE 5 > > /* MSR Feature Bits */ > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index 4ce80555b45c..e5ddb214cb36 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs) > break; > } > case 0x1f: > - if (env->nr_dies < 2) { > + if (env->nr_modules < 2 && env->nr_dies < 2) { > break; > } > /* fallthrough */ > -- > 2.34.1 > >