On 06/16/16 12:04, Paolo Bonzini wrote: > > > On 16/06/2016 08:05, Haozhong Zhang wrote: > > From: Ashok Raj <ashok.raj@xxxxxxxxx> > > > > On Intel platforms, this patch adds LMCE to KVM MCE supported > > capabilities and handles guest access to LMCE related MSRs. > > > > Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx> > > [Haozhong: macro KVM_MCE_CAP_SUPPORTED => variable kvm_mce_cap_supported > > Only enable LMCE on Intel platform > > Check MSR_IA32_FEATURE_CONTROL when handling guest > > access to MSR_IA32_MCG_EXT_CTL] > > Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx> > > --- > > arch/x86/include/asm/kvm_host.h | 5 +++++ > > arch/x86/kvm/vmx.c | 36 +++++++++++++++++++++++++++++++++++- > > arch/x86/kvm/x86.c | 15 +++++++++------ > > 3 files changed, 49 insertions(+), 7 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index e0fbe7e..75defa6 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -598,6 +598,7 @@ struct kvm_vcpu_arch { > > u64 mcg_cap; > > u64 mcg_status; > > u64 mcg_ctl; > > + u64 mcg_ext_ctl; > > u64 *mce_banks; > > > > /* Cache MMIO info */ > > @@ -1005,6 +1006,8 @@ struct kvm_x86_ops { > > int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, > > uint32_t guest_irq, bool set); > > void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); > > + > > + void (*setup_mce)(struct kvm_vcpu *vcpu); > > }; > > > > struct kvm_arch_async_pf { > > @@ -1077,6 +1080,8 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits; > > /* maximum allowed value of TSC scaling ratio */ > > extern u64 kvm_max_tsc_scaling_ratio; > > > > +extern u64 kvm_mce_cap_supported; > > + > > enum emulation_result { > > EMULATE_DONE, /* no further processing */ > > EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 1dc89c5..42db42e 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -638,7 +638,7 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) > > * feature_control_valid_bits_add/del(), so it's not included here. > > */ > > #define FEATURE_CONTROL_MAX_VALID_BITS \ > > - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX > > + (FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | FEATURE_CONTROL_LMCE) > > > > static void feature_control_valid_bits_add(struct kvm_vcpu *vcpu, uint64_t bits) > > { > > @@ -2905,6 +2905,15 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, > > return valid_bits && !(val & ~valid_bits); > > } > > > > +static inline bool vmx_mcg_ext_ctrl_msr_present(struct kvm_vcpu *vcpu, > > + bool host_initiated) > > +{ > > + return (vcpu->arch.mcg_cap & MCG_LMCE_P) && > > Checking MCG_LMCE_P is unnecessary, because you cannot set > FEATURE_CONTROL_LMCE unless MCG_LMCE_P is present. > > You can just inline this function in the callers, it's simpler. I'll remove the first line check and inline the other parts. > > > + (host_initiated || > > + (to_vmx(vcpu)->msr_ia32_feature_control & > > + FEATURE_CONTROL_LMCE)); > > +} > > + > > /* > > * Reads an msr value (of 'msr_index') into 'pdata'. > > * Returns 0 on success, non-0 otherwise. > > @@ -2946,6 +2955,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > return 1; > > msr_info->data = vmcs_read64(GUEST_BNDCFGS); > > break; > > + case MSR_IA32_MCG_EXT_CTL: > > + if (!vmx_mcg_ext_ctrl_msr_present(vcpu, > > + msr_info->host_initiated)) > > + return 1; > > + msr_info->data = vcpu->arch.mcg_ext_ctl; > > + break; > > case MSR_IA32_FEATURE_CONTROL: > > if (!vmx_feature_control_msr_valid(vcpu, 0)) > > return 1; > > @@ -3039,6 +3054,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > case MSR_IA32_TSC_ADJUST: > > ret = kvm_set_msr_common(vcpu, msr_info); > > break; > > + case MSR_IA32_MCG_EXT_CTL: > > + if (!vmx_mcg_ext_ctrl_msr_present(vcpu, > > + msr_info->host_initiated) || > > + (data & ~MCG_EXT_CTL_LMCE_EN)) > > + return 1; > > + vcpu->arch.mcg_ext_ctl = data; > > + break; > > case MSR_IA32_FEATURE_CONTROL: > > if (!vmx_feature_control_msr_valid(vcpu, data) || > > (to_vmx(vcpu)->msr_ia32_feature_control & > > @@ -6433,6 +6455,8 @@ static __init int hardware_setup(void) > > > > kvm_set_posted_intr_wakeup_handler(wakeup_handler); > > > > + kvm_mce_cap_supported |= MCG_LMCE_P; > > Ah, so virtual LMCE is available on all processors! This is > interesting, but it also makes it more complicated to handle in QEMU; a > new QEMU generally doesn't require a new kernel. > My QMEU patch checks KVM MCE capabilities before enabling LMCE (See lmce_supported() and mce_init() in QEMU Patch 1), so either new QEMU on old kernel or old QEMU on new kernel will work. Haozhong > Eduardo, any ideas? > > Thanks, > > Paolo > > > return alloc_kvm_area(); > > > > out8: > > @@ -10950,6 +10974,14 @@ out: > > return ret; > > } > > > > +static void vmx_setup_mce(struct kvm_vcpu *vcpu) > > +{ > > + if (vcpu->arch.mcg_cap & MCG_LMCE_P) > > + feature_control_valid_bits_add(vcpu, FEATURE_CONTROL_LMCE); > > + else > > + feature_control_valid_bits_del(vcpu, FEATURE_CONTROL_LMCE); > > +} > > + > > static struct kvm_x86_ops vmx_x86_ops = { > > .cpu_has_kvm_support = cpu_has_kvm_support, > > .disabled_by_bios = vmx_disabled_by_bios, > > @@ -11074,6 +11106,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > > .pmu_ops = &intel_pmu_ops, > > > > .update_pi_irte = vmx_update_pi_irte, > > + > > + .setup_mce = vmx_setup_mce, > > }; > > > > static int __init vmx_init(void) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index bf22721..5bf76ab 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -70,7 +70,8 @@ > > > > #define MAX_IO_MSRS 256 > > #define KVM_MAX_MCE_BANKS 32 > > -#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) > > +u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; > > +EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); > > > > #define emul_to_vcpu(ctxt) \ > > container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) > > @@ -983,6 +984,7 @@ static u32 emulated_msrs[] = { > > MSR_IA32_MISC_ENABLE, > > MSR_IA32_MCG_STATUS, > > MSR_IA32_MCG_CTL, > > + MSR_IA32_MCG_EXT_CTL, > > MSR_IA32_SMBASE, > > }; > > > > @@ -2684,11 +2686,9 @@ long kvm_arch_dev_ioctl(struct file *filp, > > break; > > } > > case KVM_X86_GET_MCE_CAP_SUPPORTED: { > > - u64 mce_cap; > > - > > - mce_cap = KVM_MCE_CAP_SUPPORTED; > > r = -EFAULT; > > - if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) > > + if (copy_to_user(argp, &kvm_mce_cap_supported, > > + sizeof(kvm_mce_cap_supported))) > > goto out; > > r = 0; > > break; > > @@ -2866,7 +2866,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, > > r = -EINVAL; > > if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) > > goto out; > > - if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) > > + if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) > > goto out; > > r = 0; > > vcpu->arch.mcg_cap = mcg_cap; > > @@ -2876,6 +2876,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, > > /* Init IA32_MCi_CTL to all 1s */ > > for (bank = 0; bank < bank_num; bank++) > > vcpu->arch.mce_banks[bank*4] = ~(u64)0; > > + > > + if (kvm_x86_ops->setup_mce) > > + kvm_x86_ops->setup_mce(vcpu); > > out: > > return r; > > } > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html