On Mon, Nov 18, 2019 at 10:17 AM Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > > The current guest mitigation of TAA is both too heavy and not really > sufficient. It is too heavy because it will cause some affected CPUs > (those that have MDS_NO but lack TAA_NO) to fall back to VERW and > get the corresponding slowdown. It is not really sufficient because > it will cause the MDS_NO bit to disappear upon microcode update, so > that VMs started before the microcode update will not be runnable > anymore afterwards, even with tsx=on. > > Instead, if tsx=on on the host, we can emulate MSR_IA32_TSX_CTRL for > the guest and let it run without the VERW mitigation. Even though > MSR_IA32_TSX_CTRL is quite heavyweight, and we do not want to write > it on every vmentry, we can use the shared MSR functionality because > the host kernel need not protect itself from TSX-based side-channels. > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++--- > arch/x86/kvm/x86.c | 23 +++++------------------ > 2 files changed, 36 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 04a8212704c1..ed25fe7d5234 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -450,6 +450,7 @@ noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) > MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, > #endif > MSR_EFER, MSR_TSC_AUX, MSR_STAR, > + MSR_IA32_TSX_CTRL, > }; > > #if IS_ENABLED(CONFIG_HYPERV) > @@ -1683,6 +1684,9 @@ static void setup_msrs(struct vcpu_vmx *vmx) > index = __find_msr_index(vmx, MSR_TSC_AUX); > if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) > move_msr_up(vmx, index, save_nmsrs++); > + index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); > + if (index >= 0) > + move_msr_up(vmx, index, save_nmsrs++); > > vmx->save_nmsrs = save_nmsrs; > vmx->guest_msrs_ready = false; > @@ -1782,6 +1786,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > #endif > case MSR_EFER: > return kvm_get_msr_common(vcpu, msr_info); > + case MSR_IA32_TSX_CTRL: > + if (!msr_info->host_initiated && > + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) > + return 1; > + goto find_shared_msr; > case MSR_IA32_UMWAIT_CONTROL: > if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) > return 1; > @@ -1884,8 +1893,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) > return 1; > - /* Else, falls through */ > + goto find_shared_msr; > default: > + find_shared_msr: > msr = find_msr_entry(vmx, msr_info->index); > if (msr) { > msr_info->data = msr->data; > @@ -2001,6 +2011,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > MSR_IA32_SPEC_CTRL, > MSR_TYPE_RW); > break; > + case MSR_IA32_TSX_CTRL: > + if (!msr_info->host_initiated && > + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) > + return 1; > + if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) > + return 1; > + goto find_shared_msr; > case MSR_IA32_PRED_CMD: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) > @@ -2152,8 +2169,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > /* Check reserved bit, higher 32 bits should be zero */ > if ((data >> 32) != 0) > return 1; > - /* Else, falls through */ > + goto find_shared_msr; > + > default: > + find_shared_msr: > msr = find_msr_entry(vmx, msr_index); > if (msr) { > u64 old_msr_data = msr->data; > @@ -4234,7 +4253,16 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) > continue; > vmx->guest_msrs[j].index = i; > vmx->guest_msrs[j].data = 0; > - vmx->guest_msrs[j].mask = -1ull; > + > + switch (index) { > + case MSR_IA32_TSX_CTRL: > + /* No need to pass TSX_CTRL_CPUID_CLEAR through. */ > + vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; > + break; Why even bother with the special case here? Does this make the wrmsr faster? > + default: > + vmx->guest_msrs[j].mask = -1ull; > + break; > + } > ++vmx->nmsrs; > } > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 648e84e728fc..fc54e3905fe3 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1314,29 +1314,16 @@ static u64 kvm_get_arch_capabilities(void) > data |= ARCH_CAP_MDS_NO; > > /* > - * On TAA affected systems, export MDS_NO=0 when: > - * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. > - * - Updated microcode is present. This is detected by > - * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures > - * that VERW clears CPU buffers. > - * > - * When MDS_NO=0 is exported, guests deploy clear CPU buffer > - * mitigation and don't complain: > - * > - * "Vulnerable: Clear CPU buffers attempted, no microcode" > - * > - * If TSX is disabled on the system, guests are also mitigated against > - * TAA and clear CPU buffer mitigation is not required for guests. > + * On TAA affected systems: > + * - nothing to do if TSX is disabled on the host. > + * - we emulate TSX_CTRL if present on the host. > + * This lets the guest use VERW to clear CPU buffers. > */ > if (!boot_cpu_has(X86_FEATURE_RTM)) > - data &= ~ARCH_CAP_TAA_NO; > + data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); > else if (!boot_cpu_has_bug(X86_BUG_TAA)) > data |= ARCH_CAP_TAA_NO; > - else if (data & ARCH_CAP_TSX_CTRL_MSR) > - data &= ~ARCH_CAP_MDS_NO; > > - /* KVM does not emulate MSR_IA32_TSX_CTRL. */ > - data &= ~ARCH_CAP_TSX_CTRL_MSR; > return data; > } > EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); > -- > 1.8.3.1 > >