On 1/29/2018 6:10 PM, KarimAllah Ahmed wrote: > From: Ashok Raj <ashok.raj@xxxxxxxxx> > > Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor > barriers on switching between VMs to avoid inter VM Spectre-v2 attacks. > > [peterz: rebase and changelog rewrite] > [karahmed: - rebase > - vmx: expose PRED_CMD whenever it is available > - svm: only pass through IBPB if it is available > - vmx: support !cpu_has_vmx_msr_bitmap()] > [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) > PRED_CMD is a write-only MSR] > > Cc: Asit Mallick <asit.k.mallick@xxxxxxxxx> > Cc: Dave Hansen <dave.hansen@xxxxxxxxx> > Cc: Arjan Van De Ven <arjan.van.de.ven@xxxxxxxxx> > Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> > Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> > Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> > Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx> > Cc: Andy Lutomirski <luto@xxxxxxxxxx> > Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx> > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@xxxxxxxxx > Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> > Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx> > --- > arch/x86/kvm/cpuid.c | 11 ++++++++++- > arch/x86/kvm/svm.c | 14 ++++++++++++++ > arch/x86/kvm/vmx.c | 12 ++++++++++++ > 3 files changed, 36 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index c0eb337..033004d 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -365,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | > 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); > > + /* cpuid 0x80000008.ebx */ > + const u32 kvm_cpuid_8000_0008_ebx_x86_features = > + F(IBPB); > + > /* cpuid 0xC0000001.edx */ > const u32 kvm_cpuid_C000_0001_edx_x86_features = > F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | > @@ -625,7 +629,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > if (!g_phys_as) > g_phys_as = phys_as; > entry->eax = g_phys_as | (virt_as << 8); > - entry->ebx = entry->edx = 0; > + entry->edx = 0; > + /* IBPB isn't necessarily present in hardware cpuid */ > + if (boot_cpu_has(X86_FEATURE_IBPB)) > + entry->ebx |= F(IBPB); > + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; > + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); > break; > } > case 0x80000019: > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 2744b973..c886e46 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -529,6 +529,7 @@ struct svm_cpu_data { > struct kvm_ldttss_desc *tss_desc; > > struct page *save_area; > + struct vmcb *current_vmcb; > }; > > static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); > @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) > > set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); > } > + > + if (boot_cpu_has(X86_FEATURE_IBPB)) > + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1); Not sure you really need the check here. If the feature isn't available in the hardware, then it won't be advertised in the CPUID bits to the guest, so the guest shouldn't try to write to the msr. If it does, it will #GP. So I would think it could be set all the time to not be intercepted, no? > } > > static void add_msr_offset(u32 offset) > @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) > __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); > kvm_vcpu_uninit(vcpu); > kmem_cache_free(kvm_vcpu_cache, svm); > + /* > + * The vmcb page can be recycled, causing a false negative in > + * svm_vcpu_load(). So do a full IBPB now. > + */ > + indirect_branch_prediction_barrier(); > } > > static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); > int i; > > if (unlikely(cpu != vcpu->cpu)) { > @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (static_cpu_has(X86_FEATURE_RDTSCP)) > wrmsrl(MSR_TSC_AUX, svm->tsc_aux); > > + if (sd->current_vmcb != svm->vmcb) { > + sd->current_vmcb = svm->vmcb; > + indirect_branch_prediction_barrier(); > + } > avic_vcpu_load(vcpu, cpu); > } > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index aa8638a..ea278ce 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2272,6 +2272,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { > per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; > vmcs_load(vmx->loaded_vmcs->vmcs); > + indirect_branch_prediction_barrier(); > } > > if (!already_loaded) { > @@ -3330,6 +3331,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_IA32_TSC: > kvm_write_tsc(vcpu, msr_info); > break; > + case MSR_IA32_PRED_CMD: > + if (!msr_info->host_initiated && > + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) > + return 1; > + > + if (data & PRED_CMD_IBPB) > + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); > + break; Should this also be in svm.c or as common code in x86.c? > case MSR_IA32_CR_PAT: > if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { > if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) > @@ -9548,6 +9557,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > goto free_msrs; > > msr_bitmap = vmx->vmcs01.msr_bitmap; > + > + if (boot_cpu_has(X86_FEATURE_IBPB)) > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); Same comment here as in svm.c, is the feature check necessary? Thanks, Tom > vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); >