Hi, Comments below: On Thu, Dec 10, 2015 at 02:41:21PM -0500, Ashok Raj wrote: > This patch adds basic enumeration, control msr's required to support > Local Machine Check Exception Support (LMCE). > > - Added Local Machine Check definitions, changed MCG_CAP > - Added support for IA32_FEATURE_CONTROL. > - When delivering MCE to guest, we deliver to just a single CPU > when guest OS has opted in to Local delivery. > > Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx> > Tested-by: Gong Chen <gong.chen@xxxxxxxxx> > --- > Resending with proper commit message for second patch > > target-i386/cpu.c | 8 ++++++++ > target-i386/cpu.h | 8 ++++++-- > target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++------- > 3 files changed, 45 insertions(+), 9 deletions(-) > > diff --git a/target-i386/cpu.c b/target-i386/cpu.c > index 11e5e39..167669a 100644 > --- a/target-i386/cpu.c > +++ b/target-i386/cpu.c > @@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu) > } > } > > +static void feature_control_init(X86CPU *cpu) > +{ > + CPUX86State *cenv = &cpu->env; > + > + cenv->msr_ia32_feature_control = ((1<<20) | (1<<0)); FEATURE_CONTROL_LOCKED and FEATURE_CONTROL_LMCE macros would make the code clearer. > +} > + > #ifndef CONFIG_USER_ONLY > static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) > { > @@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) > #endif > > mce_init(cpu); > + feature_control_init(cpu); > > #ifndef CONFIG_USER_ONLY > if (tcg_enabled()) { > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > index 84edfd0..a567d7a 100644 > --- a/target-i386/cpu.h > +++ b/target-i386/cpu.h > @@ -282,8 +282,9 @@ > > #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ > #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ > +#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */ Please use spaces instead of tabs. You can detect this and other coding style issues in this patch with checkpatch.pl. > > -#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) > +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P) This makes mcg_cap change when upgrading QEMU. VMs with MCG_LMCE_P enabled shouldn't be migratable to hosts running older kernels, or the guest may try to read or write MSR_IA32_MCG_EXT_CTL after miration and get a #GP. That means: 1) Older machine-types (pc-2.5 and older) should keep the old (MCG_CTL_P|MCG_SER_P) default. 2) We can't make pc-2.6 enable LMCE by default, either, because QEMU guarantees that just changing the machine-type shouldn't introduce new host requirements (see: http://article.gmane.org/gmane.comp.emulators.qemu/346651) It looks like we need a new -cpu option to enable the feature, then. At least until we raise the minimum kernel version requirements of QEMU. (I didn't review the kvm_mce_inject() changes as I am not familiar with the details of how LMCE is implemented.) > #define MCE_BANKS_DEF 10 > > #define MCG_CAP_BANKS_MASK 0xff > @@ -291,6 +292,7 @@ > #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ > #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ > #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ > +#define MCG_STATUS_LMCE (1ULL<<3) /* Local MCE signaled */ > > #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ > #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ > @@ -333,6 +335,7 @@ > #define MSR_MCG_CAP 0x179 > #define MSR_MCG_STATUS 0x17a > #define MSR_MCG_CTL 0x17b > +#define MSR_MCG_EXT_CTL 0x4d0 > > #define MSR_P6_EVNTSEL0 0x186 > > @@ -892,7 +895,6 @@ typedef struct CPUX86State { > > uint64_t mcg_status; > uint64_t msr_ia32_misc_enable; > - uint64_t msr_ia32_feature_control; > > uint64_t msr_fixed_ctr_ctrl; > uint64_t msr_global_ctrl; > @@ -977,8 +979,10 @@ typedef struct CPUX86State { > int64_t tsc_khz; > void *kvm_xsave_buf; > > + uint64_t msr_ia32_feature_control; > uint64_t mcg_cap; > uint64_t mcg_ctl; > + uint64_t mcg_ext_ctl; > uint64_t mce_banks[MCE_BANKS_DEF*4]; > > uint64_t tsc_aux; > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index 6dc9846..c61fe1f 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -72,6 +72,7 @@ static bool has_msr_tsc_aux; > static bool has_msr_tsc_adjust; > static bool has_msr_tsc_deadline; > static bool has_msr_feature_control; > +static bool has_msr_ext_mcg_ctl; > static bool has_msr_async_pf_en; > static bool has_msr_pv_eoi_en; > static bool has_msr_misc_enable; > @@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) > uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | > MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; > uint64_t mcg_status = MCG_STATUS_MCIP; > + int flags = 0; > + CPUState *cs = CPU(cpu); > + > + /* > + * We need to read back the value of MSR_EXT_MCG_CTL that was set by the > + * guest kernel back into Qemu > + */ > + cpu_synchronize_state(cs); > + > + flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; > > if (code == BUS_MCEERR_AR) { > - status |= MCI_STATUS_AR | 0x134; > - mcg_status |= MCG_STATUS_EIPV; > + status |= MCI_STATUS_AR | 0x134; > + mcg_status |= MCG_STATUS_EIPV; > + if (env->mcg_ext_ctl & 0x1) { > + mcg_status |= MCG_STATUS_LMCE; > + flags = 0; /* No Broadcast when LMCE is opted by guest */ > + } > } else { > status |= 0xc0; > mcg_status |= MCG_STATUS_RIPV; > } > cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, > - (MCM_ADDR_PHYS << 6) | 0xc, > - cpu_x86_support_mca_broadcast(env) ? > - MCE_INJECT_BROADCAST : 0); > + (MCM_ADDR_PHYS << 6) | 0xc, flags); > } > > static void hardware_memory_error(void) > @@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs) > > c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); > if (c) { > - has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || > - !!(c->ecx & CPUID_EXT_SMX); > + has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) || > + !!(c->ecx & CPUID_EXT_SMX) || > + !!(env->mcg_cap & MCG_LMCE_P)); > } > > + if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P)) > + has_msr_ext_mcg_ctl = true; > + > c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0); > if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) { > /* for migration */ > @@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) > > kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); > kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); > + kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl); > for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { > kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); > } > @@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu) > if (has_msr_feature_control) { > msrs[n++].index = MSR_IA32_FEATURE_CONTROL; > } > + if (has_msr_ext_mcg_ctl) { > + msrs[n++].index = MSR_MCG_EXT_CTL; > + } > if (has_msr_bndcfgs) { > msrs[n++].index = MSR_IA32_BNDCFGS; > } > @@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu) > case MSR_IA32_FEATURE_CONTROL: > env->msr_ia32_feature_control = msrs[i].data; > break; > + case MSR_MCG_EXT_CTL: > + env->mcg_ext_ctl = msrs[i].data; > + break; > case MSR_IA32_BNDCFGS: > env->msr_bndcfgs = msrs[i].data; > break; > -- > 2.4.3 > > -- Eduardo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html