From: Ashok Raj <ashok.raj@xxxxxxxxx> On Intel platforms, this patch adds LMCE to KVM MCE supported capabilities and handles guest access to LMCE related MSRs. Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx> [Haozhong: macro KVM_MCE_CAP_SUPPORTED => variable kvm_mce_cap_supported Only enable LMCE on Intel platform Check MSR_IA32_FEATURE_CONTROL when handling guest access to MSR_IA32_MCG_EXT_CTL] Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/vmx.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 15 +++++++++------ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e0fbe7e..75defa6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -598,6 +598,7 @@ struct kvm_vcpu_arch { u64 mcg_cap; u64 mcg_status; u64 mcg_ctl; + u64 mcg_ext_ctl; u64 *mce_banks; /* Cache MMIO info */ @@ -1005,6 +1006,8 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + + void (*setup_mce)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1077,6 +1080,8 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits; /* maximum allowed value of TSC scaling ratio */ extern u64 kvm_max_tsc_scaling_ratio; +extern u64 kvm_mce_cap_supported; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1dc89c5..42db42e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -638,7 +638,7 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) * feature_control_valid_bits_add/del(), so it's not included here. */ #define FEATURE_CONTROL_MAX_VALID_BITS \ - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX + (FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | FEATURE_CONTROL_LMCE) static void feature_control_valid_bits_add(struct kvm_vcpu *vcpu, uint64_t bits) { @@ -2905,6 +2905,15 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return valid_bits && !(val & ~valid_bits); } +static inline bool vmx_mcg_ext_ctrl_msr_present(struct kvm_vcpu *vcpu, + bool host_initiated) +{ + return (vcpu->arch.mcg_cap & MCG_LMCE_P) && + (host_initiated || + (to_vmx(vcpu)->msr_ia32_feature_control & + FEATURE_CONTROL_LMCE)); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -2946,6 +2955,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; + case MSR_IA32_MCG_EXT_CTL: + if (!vmx_mcg_ext_ctrl_msr_present(vcpu, + msr_info->host_initiated)) + return 1; + msr_info->data = vcpu->arch.mcg_ext_ctl; + break; case MSR_IA32_FEATURE_CONTROL: if (!vmx_feature_control_msr_valid(vcpu, 0)) return 1; @@ -3039,6 +3054,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: ret = kvm_set_msr_common(vcpu, msr_info); break; + case MSR_IA32_MCG_EXT_CTL: + if (!vmx_mcg_ext_ctrl_msr_present(vcpu, + msr_info->host_initiated) || + (data & ~MCG_EXT_CTL_LMCE_EN)) + return 1; + vcpu->arch.mcg_ext_ctl = data; + break; case MSR_IA32_FEATURE_CONTROL: if (!vmx_feature_control_msr_valid(vcpu, data) || (to_vmx(vcpu)->msr_ia32_feature_control & @@ -6433,6 +6455,8 @@ static __init int hardware_setup(void) kvm_set_posted_intr_wakeup_handler(wakeup_handler); + kvm_mce_cap_supported |= MCG_LMCE_P; + return alloc_kvm_area(); out8: @@ -10950,6 +10974,14 @@ out: return ret; } +static void vmx_setup_mce(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.mcg_cap & MCG_LMCE_P) + feature_control_valid_bits_add(vcpu, FEATURE_CONTROL_LMCE); + else + feature_control_valid_bits_del(vcpu, FEATURE_CONTROL_LMCE); +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -11074,6 +11106,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .pmu_ops = &intel_pmu_ops, .update_pi_irte = vmx_update_pi_irte, + + .setup_mce = vmx_setup_mce, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bf22721..5bf76ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -70,7 +70,8 @@ #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 -#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; +EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); #define emul_to_vcpu(ctxt) \ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) @@ -983,6 +984,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, }; @@ -2684,11 +2686,9 @@ long kvm_arch_dev_ioctl(struct file *filp, break; } case KVM_X86_GET_MCE_CAP_SUPPORTED: { - u64 mce_cap; - - mce_cap = KVM_MCE_CAP_SUPPORTED; r = -EFAULT; - if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) + if (copy_to_user(argp, &kvm_mce_cap_supported, + sizeof(kvm_mce_cap_supported))) goto out; r = 0; break; @@ -2866,7 +2866,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, r = -EINVAL; if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) goto out; - if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) + if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; r = 0; vcpu->arch.mcg_cap = mcg_cap; @@ -2876,6 +2876,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, /* Init IA32_MCi_CTL to all 1s */ for (bank = 0; bank < bank_num; bank++) vcpu->arch.mce_banks[bank*4] = ~(u64)0; + + if (kvm_x86_ops->setup_mce) + kvm_x86_ops->setup_mce(vcpu); out: return r; } -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html