This series allows guests to use the MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD model specific registers that were added as mitigations for CVE-2017-5715. These are only the KVM specific parts of the fix. It does *not* yet include any protection for reading host memory from the guest, because that would be done in the same way as the rest of Linux. So there is no IBRS *usage* here, no retpolines, no stuffing of the return stack buffer. (KVM already includes a fix to clear all registers on vmexit, which is enough to block Google Project Zero's PoC exploit). However, I am including the changes to use IBPB (indirect branch predictor barrier) if available. That occurs only when there is a VCPU switch on a physical CPU, thus it has a small impact on performance. The patches are a bit hackish because the relevant cpufeatures have not been included yet, and because I wanted to make the patches easier to backport to distro kernels if desired, but I would still like to have them in 4.16. Please review. The interdiff from v1 is at the end of this cover letter. Thanks, Paolo Paolo Bonzini (6): KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors x86/msr: add definitions for indirect branch predictor MSRs kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest KVM: SVM: fix comment kvm: svm: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to guest KVM: x86: add SPEC_CTRL and IBPB_SUPPORT to MSR and CPUID lists Tim Chen (1): kvm: vmx: Set IBPB when running a different VCPU Tom Lendacky (1): x86/svm: Set IBPB when running a different VCPU arch/x86/include/asm/msr-index.h | 9 ++++- arch/x86/kvm/cpuid.c | 27 ++++++++++--- arch/x86/kvm/cpuid.h | 22 +++++++++++ arch/x86/kvm/svm.c | 83 +++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 6 files changed, 193 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec08f1d8d39b..828a03425571 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -39,11 +39,6 @@ /* Intel MSRs. Some also available on other CPUs */ -#define MSR_IA32_SPEC_CTRL 0x00000048 - -#define MSR_IA32_PRED_CMD 0x00000049 -#define FEATURE_SET_IBPB (1UL << 0) - #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -469,8 +464,15 @@ #define MSR_SMI_COUNT 0x00000034 #define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_TSC_ADJUST 0x0000003b + +#define MSR_IA32_SPEC_CTRL 0x00000048 +#define SPEC_CTRL_FEATURE_DISABLE_IBRS (0 << 0) +#define SPEC_CTRL_FEATURE_ENABLE_IBRS (1 << 0) + +#define MSR_IA32_PRED_CMD 0x00000049 +#define PRED_CMD_IBPB (1UL << 0) + #define MSR_IA32_BNDCFGS 0x00000d90 - #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc #define MSR_IA32_XSS 0x00000da0 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index dd744d896cec..c249d5f599e4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1738,7 +1738,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) * svm_vcpu_load; block speculative execution. */ if (have_ibpb_support) - wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1776,7 +1776,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; if (have_ibpb_support) - wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); } avic_vcpu_load(vcpu, cpu); @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->nested.vm_cr_msr; break; case MSR_IA32_SPEC_CTRL: + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))) + return 1; msr_info->data = svm->spec_ctrl; break; case MSR_IA32_UCODE_REV: @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; case MSR_IA32_SPEC_CTRL: + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))) + return 1; svm->spec_ctrl = data; break; case MSR_IA32_APICBASE: @@ -4996,6 +5004,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch + * before vmentry. + */ if (have_spec_ctrl && svm->spec_ctrl != 0) wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); @@ -5077,6 +5089,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (svm->spec_ctrl != 0) wrmsrl(MSR_IA32_SPEC_CTRL, 0); } + /* + * Speculative execution past the above wrmsrl might encounter + * an indirect branch and use guest-controlled contents of the + * indirect branch predictor; block it. + */ + asm("lfence"); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf127c570675..ef2681fa568a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2376,7 +2376,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); if (have_spec_ctrl) - wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); } if (!already_loaded) { @@ -3347,6 +3347,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + struct vcpu_vmx *vmx = to_vmx(vcpu); struct shared_msr_entry *msr; switch (msr_info->index) { @@ -3358,8 +3359,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_GS_BASE); break; case MSR_KERNEL_GS_BASE: - vmx_load_host_state(to_vmx(vcpu)); - msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base; + vmx_load_host_state(vmx); + msr_info->data = vmx->msr_guest_kernel_gs_base; break; #endif case MSR_EFER: @@ -3368,7 +3369,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = guest_read_tsc(vcpu); break; case MSR_IA32_SPEC_CTRL: - msr_info->data = to_vmx(vcpu)->spec_ctrl; + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))) + return 1; + msr_info->data = to_vmx(vcpu)->spec_ctrl; break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); @@ -3388,13 +3393,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && - !(to_vmx(vcpu)->msr_ia32_feature_control & + !(vmx->msr_ia32_feature_control & FEATURE_CONTROL_LMCE)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; case MSR_IA32_FEATURE_CONTROL: - msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control; + msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) @@ -3443,7 +3448,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; /* Otherwise falls through */ default: - msr = find_msr_entry(to_vmx(vcpu), msr_info->index); + msr = find_msr_entry(vmx, msr_info->index); if (msr) { msr_info->data = msr->data; break; @@ -3510,7 +3515,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_write_tsc(vcpu, msr_info); break; case MSR_IA32_SPEC_CTRL: - to_vmx(vcpu)->spec_ctrl = msr_info->data; + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) || + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))) + return 1; + to_vmx(vcpu)->spec_ctrl = data; break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { @@ -4046,7 +4046,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) * vmx_vcpu_load; block speculative execution. */ if (have_spec_ctrl) - wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); } static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx) @@ -9629,13 +9638,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); - if (have_spec_ctrl && vmx->spec_ctrl != 0) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); - atomic_switch_perf_msrs(vmx); vmx_arm_hv_timer(vcpu); + /* + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch + * before vmentry. + */ + if (have_spec_ctrl && vmx->spec_ctrl != 0) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -9744,9 +9757,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (have_spec_ctrl) { rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); - if (vmx->spec_ctrl) + if (vmx->spec_ctrl != 0) wrmsrl(MSR_IA32_SPEC_CTRL, 0); } + /* + * Speculative execution past the above wrmsrl might encounter + * an indirect branch and use guest-controlled contents of the + * indirect branch predictor; block it. + */ + asm("lfence"); /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) -- 1.8.3.1