Subject should be something like "Enable CET virtualization", or maybe move CPUID changes to a separate final patch? On Fri, Dec 27, 2019 at 10:11:33AM +0800, Yang Weijiang wrote: > There're two different places storing Guest CET states, states > managed with XSAVES/XRSTORS, as restored/saved > in previous patch, can be read/write directly from/to the MSRs. > For those stored in VMCS fields, they're access via vmcs_read/ > vmcs_write. > > Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 3 +- > arch/x86/kvm/cpuid.c | 5 +- > arch/x86/kvm/vmx/vmx.c | 138 ++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 11 +++ > 4 files changed, 154 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 64bf379381e4..34140462084f 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -90,7 +90,8 @@ > | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ > | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ > | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ > - | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) > + | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ > + | X86_CR4_CET)) > > #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 126a31b99823..4414bd110f3c 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -385,13 +385,14 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) > F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | > F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | > F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | > - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; > + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK) | > + 0 /*WAITPKG*/; > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = > F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | > F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | > - F(MD_CLEAR); > + F(MD_CLEAR) | F(IBT); > > /* cpuid 7.1.eax */ > const u32 kvm_cpuid_7_1_eax_x86_features = > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 0a75b65d03f0..52ac67604026 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1763,6 +1763,96 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) > return 0; > } > > +#define CET_MSR_RSVD_BITS_1 0x3 > +#define CET_MSR_RSVD_BITS_2 (0xF << 6) Would it make sense to use GENMASK? > +static bool cet_ssp_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr) > +{ > + u64 data = msr->data; > + u32 high_word = data >> 32; > + > + if (is_64_bit_mode(vcpu)) { > + if (data & CET_MSR_RSVD_BITS_1) This looks odd. I assume it should look more like cet_ctl_write_allowed()? E.g. if (data & CET_MSR_RSVD_BITS_1) return false; if (!is_64_bit_mode(vcpu) && high_word) return false; > + return false; > + } else if (high_word) { > + return false; > + } > + > + return true; > +} > + > +static bool cet_ctl_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr) > +{ > + u64 data = msr->data; > + u32 high_word = data >> 32; > + > + if (data & CET_MSR_RSVD_BITS_2) > + return false; > + > + if (!is_64_bit_mode(vcpu) && high_word) > + return false; > + > + return true; > +} > + > +static bool cet_ssp_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr) > +{ > + u64 kvm_xss; > + u32 index = msr->index; > + > + if (is_guest_mode(vcpu)) Hmm, this seems wrong, e.g. shouldn't WRMSR be allowed if L1 passes the MSR to L2, which is the only way to reach this, if I'm not mistaken. > + return false; > + > + if (!boot_cpu_has(X86_FEATURE_SHSTK)) > + return false; > + > + if (!msr->host_initiated && > + !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK)) > + return false; > + > + if (index == MSR_IA32_INT_SSP_TAB) > + return true; > + > + kvm_xss = kvm_supported_xss(); > + > + if (index == MSR_IA32_PL3_SSP) { > + if (!(kvm_xss & XFEATURE_MASK_CET_USER)) > + return false; > + } else if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL)) { > + return false; > + } > + > + return true; > +} > + > +static bool cet_ctl_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr) > +{ > + u64 kvm_xss; > + u32 index = msr->index; > + > + if (is_guest_mode(vcpu)) > + return false; > + > + kvm_xss = kvm_supported_xss(); > + > + if (!boot_cpu_has(X86_FEATURE_SHSTK) && > + !boot_cpu_has(X86_FEATURE_IBT)) > + return false; > + > + if (!msr->host_initiated && > + !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) && > + !guest_cpuid_has(vcpu, X86_FEATURE_IBT)) > + return false; > + > + if (index == MSR_IA32_U_CET) { > + if (!(kvm_xss & XFEATURE_MASK_CET_USER)) > + return false; > + } else if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL)) { > + return false; > + } > + > + return true; > +} > /* > * Reads an msr value (of 'msr_index') into 'pdata'. > * Returns 0 on success, non-0 otherwise. > @@ -1886,6 +1976,26 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > else > msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; > break; > + case MSR_IA32_S_CET: > + if (!cet_ctl_access_allowed(vcpu, msr_info)) > + return 1; > + msr_info->data = vmcs_readl(GUEST_S_CET); > + break; > + case MSR_IA32_INT_SSP_TAB: > + if (!cet_ssp_access_allowed(vcpu, msr_info)) > + return 1; > + msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE); > + break; > + case MSR_IA32_U_CET: > + if (!cet_ctl_access_allowed(vcpu, msr_info)) > + return 1; > + rdmsrl(MSR_IA32_U_CET, msr_info->data); > + break; > + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: > + if (!cet_ssp_access_allowed(vcpu, msr_info)) > + return 1; > + rdmsrl(msr_info->index, msr_info->data); Ugh, thought of another problem. If a SoftIRQ runs after an IRQ it can load the kernel FPU state. So for all the XSAVES MSRs we'll need a helper similar to vmx_write_guest_kernel_gs_base(), except XSAVES has to be even more restrictive and disable IRQs entirely. E.g. static void vmx_get_xsave_msr(struct msr_data *msr_info) { local_irq_disable(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_return(); rdmsrl(msr_info->index, msr_info->data); local_irq_enable(); } > + break; > case MSR_TSC_AUX: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) > @@ -2147,6 +2257,34 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > else > vmx->pt_desc.guest.addr_a[index / 2] = data; > break; > + case MSR_IA32_S_CET: > + if (!cet_ctl_access_allowed(vcpu, msr_info)) > + return 1; > + if (!cet_ctl_write_allowed(vcpu, msr_info)) > + return 1; > + vmcs_writel(GUEST_S_CET, data); > + break; > + case MSR_IA32_INT_SSP_TAB: > + if (!cet_ctl_access_allowed(vcpu, msr_info)) > + return 1; > + if (!is_64_bit_mode(vcpu)) > + return 1; > + vmcs_writel(GUEST_INTR_SSP_TABLE, data); > + break; > + case MSR_IA32_U_CET: > + if (!cet_ctl_access_allowed(vcpu, msr_info)) > + return 1; > + if (!cet_ctl_write_allowed(vcpu, msr_info)) > + return 1; > + wrmsrl(MSR_IA32_U_CET, data); > + break; > + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: > + if (!cet_ssp_access_allowed(vcpu, msr_info)) > + return 1; > + if (!cet_ssp_write_allowed(vcpu, msr_info)) > + return 1; > + wrmsrl(msr_info->index, data); > + break; > case MSR_TSC_AUX: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6dbe77365b22..7de6faa6aa51 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1186,6 +1186,10 @@ static const u32 msrs_to_save_all[] = { > MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, > MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, > MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, > + > + MSR_IA32_XSS, MSR_IA32_U_CET, MSR_IA32_S_CET, > + MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, > + MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, > }; > > static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; > @@ -1468,6 +1472,13 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, > * invokes 64-bit SYSENTER. > */ > data = get_canonical(data, vcpu_virt_addr_bits(vcpu)); > + break; > + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: > + case MSR_IA32_U_CET: > + case MSR_IA32_S_CET: > + case MSR_IA32_INT_SSP_TAB: > + if (is_noncanonical_address(data, vcpu)) > + return 1; > } > > msr.data = data; > -- > 2.17.2 >