Re: [PATCH v3 2/4] KVM: x86: Add IBPB support

Jim Mattson <jmattson@xxxxxxxxxx> · Tue, 30 Jan 2018 09:19:14 -0800

On Mon, Jan 29, 2018 at 4:10 PM, KarimAllah Ahmed <karahmed@xxxxxxxxx> wrote:
> From: Ashok Raj <ashok.raj@xxxxxxxxx>
>
> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
> barriers on switching between VMs to avoid inter VM Spectre-v2 attacks.
>
> [peterz: rebase and changelog rewrite]
> [karahmed: - rebase
>            - vmx: expose PRED_CMD whenever it is available
>            - svm: only pass through IBPB if it is available
>            - vmx: support !cpu_has_vmx_msr_bitmap()]
> [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
>         PRED_CMD is a write-only MSR]
>
> Cc: Asit Mallick <asit.k.mallick@xxxxxxxxx>
> Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
> Cc: Arjan Van De Ven <arjan.van.de.ven@xxxxxxxxx>
> Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@xxxxxxxxx
> Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
> Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
> ---
>  arch/x86/kvm/cpuid.c | 11 ++++++++++-
>  arch/x86/kvm/svm.c   | 14 ++++++++++++++
>  arch/x86/kvm/vmx.c   | 12 ++++++++++++
>  3 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index c0eb337..033004d 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -365,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>                 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
>                 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
>
> +       /* cpuid 0x80000008.ebx */
> +       const u32 kvm_cpuid_8000_0008_ebx_x86_features =
> +               F(IBPB);
> +
>         /* cpuid 0xC0000001.edx */
>         const u32 kvm_cpuid_C000_0001_edx_x86_features =
>                 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
> @@ -625,7 +629,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>                 if (!g_phys_as)
>                         g_phys_as = phys_as;
>                 entry->eax = g_phys_as | (virt_as << 8);
> -               entry->ebx = entry->edx = 0;
> +               entry->edx = 0;
> +               /* IBPB isn't necessarily present in hardware cpuid */
> +               if (boot_cpu_has(X86_FEATURE_IBPB))
> +                       entry->ebx |= F(IBPB);
> +               entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
> +               cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
>                 break;
>         }
>         case 0x80000019:
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 2744b973..c886e46 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -529,6 +529,7 @@ struct svm_cpu_data {
>         struct kvm_ldttss_desc *tss_desc;
>
>         struct page *save_area;
> +       struct vmcb *current_vmcb;
>  };
>
>  static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
>
>                 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
>         }
> +
> +       if (boot_cpu_has(X86_FEATURE_IBPB))
> +               set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
>  }
>
>  static void add_msr_offset(u32 offset)
> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
>         kvm_vcpu_uninit(vcpu);
>         kmem_cache_free(kvm_vcpu_cache, svm);
> +       /*
> +        * The vmcb page can be recycled, causing a false negative in
> +        * svm_vcpu_load(). So do a full IBPB now.
> +        */
> +       indirect_branch_prediction_barrier();
>  }
>
>  static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
> +       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
>         int i;
>
>         if (unlikely(cpu != vcpu->cpu)) {
> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>         if (static_cpu_has(X86_FEATURE_RDTSCP))
>                 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
>
> +       if (sd->current_vmcb != svm->vmcb) {
> +               sd->current_vmcb = svm->vmcb;
> +               indirect_branch_prediction_barrier();
> +       }
>         avic_vcpu_load(vcpu, cpu);
>  }
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa8638a..ea278ce 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2272,6 +2272,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>         if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>                 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>                 vmcs_load(vmx->loaded_vmcs->vmcs);
> +               indirect_branch_prediction_barrier();
>         }
>
>         if (!already_loaded) {
> @@ -3330,6 +3331,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>         case MSR_IA32_TSC:
>                 kvm_write_tsc(vcpu, msr_info);
>                 break;
> +       case MSR_IA32_PRED_CMD:
> +               if (!msr_info->host_initiated &&
> +                   !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
> +                       return 1;
> +
> +               if (data & PRED_CMD_IBPB)
> +                       wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> +               break;
>         case MSR_IA32_CR_PAT:
>                 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
>                         if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -9548,6 +9557,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>                 goto free_msrs;
>
>         msr_bitmap = vmx->vmcs01.msr_bitmap;
> +
> +       if (boot_cpu_has(X86_FEATURE_IBPB))
> +               vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>         vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
> --
> 2.7.4
>

Are you planning to allow L2 to write MSR_IA32_PRED_CMD without L0
intercepting it, if the MSR write intercept is disabled in both the
vmcs01 MSR permission bitmap and the vmcs12 MSR permission bitmap?