Re: [PATCH v9 24/27] KVM: x86: Enable CET virtualization for VMX and advertise to userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jan 23, 2024 at 06:41:57PM -0800, Yang Weijiang wrote:
>Expose CET features to guest if KVM/host can support them, clear CPUID
>feature bits if KVM/host cannot support.
>
>Set CPUID feature bits so that CET features are available in guest CPUID.
>Add CR4.CET bit support in order to allow guest set CET master control
>bit.
>
>Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
>KVM does not support emulating CET.
>
>The CET load-bits in VM_ENTRY/VM_EXIT control fields should be set to make
>guest CET xstates isolated from host's.
>
>On platforms with VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error
>code will fail, and if VMX_BASIC[bit56] == 1, #CP injection with or without
>error code is allowed. Disable CET feature bits if the MSR bit is cleared
>so that nested VMM can inject #CP if and only if VMX_BASIC[bit56] == 1.
>
>Don't expose CET feature if either of {U,S}_CET xstate bits is cleared
>in host XSS or if XSAVES isn't supported.
>
>CET MSR contents after reset, power-up and INIT are set to 0s, clears the
>guest fpstate fields so that the guest MSRs are reset to 0s after the events.
>
>Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
>Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
>---
> arch/x86/include/asm/kvm_host.h  |  2 +-
> arch/x86/include/asm/msr-index.h |  1 +
> arch/x86/kvm/cpuid.c             | 25 ++++++++++++++++++++-----
> arch/x86/kvm/vmx/capabilities.h  |  6 ++++++
> arch/x86/kvm/vmx/vmx.c           | 28 +++++++++++++++++++++++++++-
> arch/x86/kvm/vmx/vmx.h           |  6 ++++--
> arch/x86/kvm/x86.c               | 31 +++++++++++++++++++++++++++++--
> arch/x86/kvm/x86.h               |  3 +++
> 8 files changed, 91 insertions(+), 11 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index 6efaaaa15945..161d0552be5f 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -134,7 +134,7 @@
> 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> 			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
>-			  | X86_CR4_LAM_SUP))
>+			  | X86_CR4_LAM_SUP | X86_CR4_CET))
> 
> #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
> 
>diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>index 1d51e1850ed0..233e00c01e62 100644
>--- a/arch/x86/include/asm/msr-index.h
>+++ b/arch/x86/include/asm/msr-index.h
>@@ -1102,6 +1102,7 @@
> #define VMX_BASIC_MEM_TYPE_MASK	0x003c000000000000LLU
> #define VMX_BASIC_MEM_TYPE_WB	6LLU
> #define VMX_BASIC_INOUT		0x0040000000000000LLU
>+#define VMX_BASIC_NO_HW_ERROR_CODE_CC	0x0100000000000000LLU
> 
> /* Resctrl MSRs: */
> /* - Intel: */
>diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>index 95233b0879a3..fddc54991cd4 100644
>--- a/arch/x86/kvm/cpuid.c
>+++ b/arch/x86/kvm/cpuid.c
>@@ -150,14 +150,14 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
> 			return -EINVAL;
> 	}
> 	/*
>-	 * Prevent 32-bit guest launch if shadow stack is exposed as SSP
>-	 * state is not defined for 32-bit SMRAM.
>+	 * CET is not supported for 32-bit guest, prevent guest launch if
>+	 * shadow stack or IBT is enabled for 32-bit guest.
> 	 */
> 	best = cpuid_entry2_find(entries, nent, 0x80000001,
> 				 KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> 	if (best && !(best->edx & F(LM))) {
> 		best = cpuid_entry2_find(entries, nent, 0x7, 0);
>-		if (best && (best->ecx & F(SHSTK)))
>+		if (best && ((best->ecx & F(SHSTK)) || (best->edx & F(IBT))))

IBT has nothing to do with SSP. why bother to do this?

> 			return -EINVAL;
> 	}
> 
>@@ -665,7 +665,7 @@ void kvm_set_cpu_caps(void)
> 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
> 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> 		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
>-		F(SGX_LC) | F(BUS_LOCK_DETECT)
>+		F(SGX_LC) | F(BUS_LOCK_DETECT) | F(SHSTK)
> 	);
> 	/* Set LA57 based on hardware capability. */
> 	if (cpuid_ecx(7) & F(LA57))
>@@ -683,7 +683,8 @@ void kvm_set_cpu_caps(void)
> 		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> 		F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
> 		F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
>-		F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
>+		F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) |
>+		F(IBT)
> 	);
> 
> 	/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
>@@ -696,6 +697,20 @@ void kvm_set_cpu_caps(void)
> 		kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
> 	if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
> 		kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
>+	/*
>+	 * Don't use boot_cpu_has() to check availability of IBT because the
>+	 * feature bit is cleared in boot_cpu_data when ibt=off is applied
>+	 * in host cmdline.
>+	 *
>+	 * As currently there's no HW bug which requires disabling IBT feature
>+	 * while CPU can enumerate it, host cmdline option ibt=off is most
>+	 * likely due to administrative reason on host side, so KVM refers to
>+	 * CPU CPUID enumeration to enable the feature. In future if there's
>+	 * actually some bug clobbered ibt=off option, then enforce additional
>+	 * check here to disable the support in KVM.
>+	 */
>+	if (cpuid_edx(7) & F(IBT))
>+		kvm_cpu_cap_set(X86_FEATURE_IBT);

This can be done in a separate patch.

And we don't know whether IBT is cleared due to ibt=off. It could be due to
lack of IBT on some CPUs; advertising IBT in this case is incorrect.

> 
> 	kvm_cpu_cap_mask(CPUID_7_1_EAX,
> 		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
>diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
>index ee8938818c8a..e12bc233d88b 100644
>--- a/arch/x86/kvm/vmx/capabilities.h
>+++ b/arch/x86/kvm/vmx/capabilities.h
>@@ -79,6 +79,12 @@ static inline bool cpu_has_vmx_basic_inout(void)
> 	return	(((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
> }
> 
>+static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
>+{
>+	return	((u64)vmcs_config.basic_cap << 32) &
>+		 VMX_BASIC_NO_HW_ERROR_CODE_CC;
>+}
>+
> static inline bool cpu_has_virtual_nmis(void)
> {
> 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 3993afbacd51..ef7aca954228 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -2609,6 +2609,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> 		{ VM_ENTRY_LOAD_IA32_EFER,		VM_EXIT_LOAD_IA32_EFER },
> 		{ VM_ENTRY_LOAD_BNDCFGS,		VM_EXIT_CLEAR_BNDCFGS },
> 		{ VM_ENTRY_LOAD_IA32_RTIT_CTL,		VM_EXIT_CLEAR_IA32_RTIT_CTL },
>+		{ VM_ENTRY_LOAD_CET_STATE,		VM_EXIT_LOAD_CET_STATE },
> 	};
> 
> 	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
>@@ -4934,6 +4935,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> 
> 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
> 
>+	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
..
>+		vmcs_writel(GUEST_SSP, 0);
>+	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
>+	    kvm_cpu_cap_has(X86_FEATURE_IBT))
>+		vmcs_writel(GUEST_S_CET, 0);
>+	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
>+		vmcs_writel(GUEST_INTR_SSP_TABLE, 0);

At least this can be merged with the first if-statement.

how about:
	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
		vmcs_writel(GUEST_SSP, 0);
		vmcs_writel(GUEST_S_CET, 0);
		vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
	} else if (kvm_cpu_cap_has(X86_FEATURE_IBT)) {
		vmcs_writel(GUEST_S_CET, 0);
	}

> 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
> 
> 	vpid_sync_context(vmx->vpid);
>@@ -6353,6 +6362,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> 	if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
> 		vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
> 
>+	if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
>+		pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
>+		pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
>+		pr_err("INTR SSP TABLE = 0x%016lx\n",
>+		       vmcs_readl(GUEST_INTR_SSP_TABLE));

how about merging them into one line?

>+	}
> 	pr_err("*** Host State ***\n");
> 	pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
> 	       vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
>@@ -6430,6 +6445,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> 	if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
> 		pr_err("Virtual processor ID = 0x%04x\n",
> 		       vmcs_read16(VIRTUAL_PROCESSOR_ID));
>+	if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
>+		pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
>+		pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
>+		pr_err("INTR SSP TABLE = 0x%016lx\n",
>+		       vmcs_readl(HOST_INTR_SSP_TABLE));

ditto.

>+	}
> }
> 
> /*
>@@ -7965,7 +7986,6 @@ static __init void vmx_set_cpu_caps(void)
> 		kvm_cpu_cap_set(X86_FEATURE_UMIP);
> 
> 	/* CPUID 0xD.1 */
>-	kvm_caps.supported_xss = 0;
> 	if (!cpu_has_vmx_xsaves())
> 		kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
> 
>@@ -7977,6 +7997,12 @@ static __init void vmx_set_cpu_caps(void)
> 
> 	if (cpu_has_vmx_waitpkg())
> 		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
>+
>+	if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
>+	    !cpu_has_vmx_basic_no_hw_errcode()) {

Can you add a comment here? This way, readers won't need to dig through git
history to understand the reason.

>+		kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
>+		kvm_cpu_cap_clear(X86_FEATURE_IBT);
>+	}
> }
> 
> static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
>diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
>index e3b0985bb74a..d0cad2624564 100644
>--- a/arch/x86/kvm/vmx/vmx.h
>+++ b/arch/x86/kvm/vmx/vmx.h
>@@ -484,7 +484,8 @@ static inline u8 vmx_get_rvi(void)
> 	 VM_ENTRY_LOAD_IA32_EFER |					\
> 	 VM_ENTRY_LOAD_BNDCFGS |					\
> 	 VM_ENTRY_PT_CONCEAL_PIP |					\
>-	 VM_ENTRY_LOAD_IA32_RTIT_CTL)
>+	 VM_ENTRY_LOAD_IA32_RTIT_CTL |					\
>+	 VM_ENTRY_LOAD_CET_STATE)
> 
> #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS				\
> 	(VM_EXIT_SAVE_DEBUG_CONTROLS |					\
>@@ -506,7 +507,8 @@ static inline u8 vmx_get_rvi(void)
> 	       VM_EXIT_LOAD_IA32_EFER |					\
> 	       VM_EXIT_CLEAR_BNDCFGS |					\
> 	       VM_EXIT_PT_CONCEAL_PIP |					\
>-	       VM_EXIT_CLEAR_IA32_RTIT_CTL)
>+	       VM_EXIT_CLEAR_IA32_RTIT_CTL |				\
>+	       VM_EXIT_LOAD_CET_STATE)
> 
> #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL			\
> 	(PIN_BASED_EXT_INTR_MASK |					\
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 9596763fae8d..eb531823447a 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -231,7 +231,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
> 				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
> 				| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
> 
>-#define KVM_SUPPORTED_XSS     0
>+#define KVM_SUPPORTED_XSS	(XFEATURE_MASK_CET_USER | \
>+				 XFEATURE_MASK_CET_KERNEL)
> 
> u64 __read_mostly host_efer;
> EXPORT_SYMBOL_GPL(host_efer);
>@@ -9921,6 +9922,20 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> 	if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> 		kvm_caps.supported_xss = 0;
> 
>+	if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
>+	    !kvm_cpu_cap_has(X86_FEATURE_IBT))
>+		kvm_caps.supported_xss &= ~(XFEATURE_MASK_CET_USER |
>+					    XFEATURE_MASK_CET_KERNEL);
>+


>+	if ((kvm_caps.supported_xss & (XFEATURE_MASK_CET_USER |
>+	     XFEATURE_MASK_CET_KERNEL)) !=
>+	    (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)) {
>+		kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
>+		kvm_cpu_cap_clear(X86_FEATURE_IBT);
>+		kvm_caps.supported_xss &= ~(XFEATURE_MASK_CET_USER |
>+					    XFEATURE_MASK_CET_KERNEL);
>+	}

I am not sure why this is necessary. Could you please explain?




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux