[RFC PATCH v3 01/10] KVM: VMX: Virtualize Intel IA32_SPEC_CTRL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Daniel Sneddon <daniel.sneddon@xxxxxxxxxxxxxxx>

Currently KVM disables interception of IA32_SPEC_CTRL after a non-0 is
written to IA32_SPEC_CTRL by guest. The guest is allowed to write any
value directly to hardware. There is a tertiary control for
IA32_SPEC_CTRL. This control allows for bits in IA32_SPEC_CTRL to be
masked to prevent guests from changing those bits.

Add controls setting the mask for IA32_SPEC_CTRL and desired value for
masked bits.

These new controls are especially helpful for protecting guests that
don't know about BHI_DIS_S and that are running on hardware that
supports it. This allows the hypervisor to set BHI_DIS_S to fully
protect the guest.

Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Daniel Sneddon <daniel.sneddon@xxxxxxxxxxxxxxx>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx>
[ add a new ioctl to report supported bits. Fix the inverted check ]
Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
---
 Documentation/virt/kvm/api.rst     | 39 +++++++++++++++++
 arch/x86/include/asm/kvm_host.h    |  4 ++
 arch/x86/include/asm/vmx.h         |  5 +++
 arch/x86/include/asm/vmxfeatures.h |  2 +
 arch/x86/kvm/vmx/capabilities.h    |  5 +++
 arch/x86/kvm/vmx/vmx.c             | 68 +++++++++++++++++++++++++++---
 arch/x86/kvm/vmx/vmx.h             |  3 +-
 arch/x86/kvm/x86.c                 | 30 +++++++++++++
 arch/x86/kvm/x86.h                 |  1 +
 include/uapi/linux/kvm.h           |  4 ++
 10 files changed, 155 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0b5a33ee71ee..b6eeb1d6eb65 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6352,6 +6352,19 @@ a single guest_memfd file, but the bound ranges must not overlap).
 
 See KVM_SET_USER_MEMORY_REGION2 for additional details.
 
+4.143 KVM_GET_SUPPORTED_FORCE_SPEC_CTRL
+---------------------------------------
+
+:Capability: KVM_CAP_FORCE_SPEC_CTRL
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: u64 supported_bitmask (out)
+:Returns: 0 on success, -EFAULT if supported_bitmap cannot be accessed
+
+Returns a bitmask of SPEC_CTRL MSR bits which can be forced on. All bits can be
+forced to 0 (i.e., prevent guest from setting it) even if KVM doesn't support
+the bit.
+
 5. The kvm_run structure
 ========================
 
@@ -8063,6 +8076,32 @@ error/annotated fault.
 
 See KVM_EXIT_MEMORY_FAULT for more information.
 
+7.35 KVM_CAP_FORCE_SPEC_CTRL
+----------------------------
+
+:Architectures: x86
+:Parameters: args[0] contains the bitmask to prevent guests from modifying those
+	     bits
+	     args[1] contains the desired value to set in IA32_SPEC_CTRL for the
+	     masked bits
+:Returns: 0 on success, -EINVAL if args[0] or args[1] contain invalid values
+
+This capability allows userspace to configure the value of IA32_SPEC_CTRL and
+what bits the VM can and cannot access. This is especially useful when a VM is
+migrated to newer hardware with hardware based speculation mitigations not
+provided to the VM previously.
+
+IA32_SPEC_CTRL virtualization works by introducing the IA32_SPEC_CTRL shadow
+and mask fields. When a guest writes to IA32_SPEC_CTRL when it is virtualized
+the value written is:
+
+(GUEST_WRMSR_VAL & ~MASK) | (REAL_MSR_VAL & MASK).
+
+No bit that is masked can be modified by the guest.
+
+The shadow field contains the value the guest wrote to the MSR and is what is
+returned to the guest when the virtualized MSR is read.
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16e07a2eee19..8220414cf697 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1404,6 +1404,10 @@ struct kvm_arch {
 
 	u32 notify_window;
 	u32 notify_vmexit_flags;
+
+	u64 force_spec_ctrl_mask;
+	u64 force_spec_ctrl_value;
+
 	/*
 	 * If exit_on_emulation_error is set, and the in-kernel instruction
 	 * emulator fails to emulate an instruction, allow userspace
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4dba17363008..f65651a3898c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -84,6 +84,7 @@
  * Definitions of Tertiary Processor-Based VM-Execution Controls.
  */
 #define TERTIARY_EXEC_IPI_VIRT			VMCS_CONTROL_BIT(IPI_VIRT)
+#define TERTIARY_EXEC_SPEC_CTRL_SHADOW		VMCS_CONTROL_BIT(SPEC_CTRL_SHADOW)
 
 #define PIN_BASED_EXT_INTR_MASK                 VMCS_CONTROL_BIT(INTR_EXITING)
 #define PIN_BASED_NMI_EXITING                   VMCS_CONTROL_BIT(NMI_EXITING)
@@ -236,6 +237,10 @@ enum vmcs_field {
 	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
 	PID_POINTER_TABLE		= 0x00002042,
 	PID_POINTER_TABLE_HIGH		= 0x00002043,
+	IA32_SPEC_CTRL_MASK             = 0x0000204A,
+	IA32_SPEC_CTRL_MASK_HIGH        = 0x0000204B,
+	IA32_SPEC_CTRL_SHADOW           = 0x0000204C,
+	IA32_SPEC_CTRL_SHADOW_HIGH      = 0x0000204D,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 266daf5b5b84..6dbfe9004d92 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -90,4 +90,6 @@
 
 /* Tertiary Processor-Based VM-Execution Controls, word 3 */
 #define VMX_FEATURE_IPI_VIRT		( 3*32+  4) /* Enable IPI virtualization */
+#define VMX_FEATURE_SPEC_CTRL_SHADOW	( 3*32+  7) /* IA32_SPEC_CTRL shadow */
+
 #endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 41a4533f9989..6c51a5abb16b 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -138,6 +138,11 @@ static inline bool cpu_has_tertiary_exec_ctrls(void)
 		CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
 }
 
+static inline bool cpu_has_spec_ctrl_shadow(void)
+{
+	return vmcs_config.cpu_based_3rd_exec_ctrl & TERTIARY_EXEC_SPEC_CTRL_SHADOW;
+}
+
 static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c37a89eda90f..a6154d725025 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2008,7 +2008,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		    !guest_has_spec_ctrl_msr(vcpu))
 			return 1;
 
-		msr_info->data = to_vmx(vcpu)->spec_ctrl;
+		if (cpu_has_spec_ctrl_shadow())
+			msr_info->data = vmcs_read64(IA32_SPEC_CTRL_SHADOW);
+		else
+			msr_info->data = to_vmx(vcpu)->spec_ctrl;
 		break;
 	case MSR_IA32_SYSENTER_CS:
 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
@@ -2148,6 +2151,19 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
 	return debugctl;
 }
 
+static void vmx_set_spec_ctrl(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	vmx->spec_ctrl = val;
+
+	if (cpu_has_spec_ctrl_shadow()) {
+		vmcs_write64(IA32_SPEC_CTRL_SHADOW, val);
+
+		vmx->spec_ctrl |= vcpu->kvm->arch.force_spec_ctrl_value;
+	}
+}
+
 /*
  * Writes msr value into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -2273,7 +2289,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (kvm_spec_ctrl_test_value(data))
 			return 1;
 
-		vmx->spec_ctrl = data;
+		vmx_set_spec_ctrl(vcpu, data);
+
 		if (!data)
 			break;
 
@@ -4785,6 +4802,23 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 	if (cpu_has_vmx_xsaves())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
+	if (cpu_has_spec_ctrl_shadow()) {
+		vmcs_write64(IA32_SPEC_CTRL_SHADOW, 0);
+
+		/*
+		 * Note, IA32_SPEC_CTRL_{SHADOW,MASK} subtly behave *very*
+		 * differently than other shadow+mask combinations.  Attempts
+		 * to modify bits in MASK are silently ignored and do NOT cause
+		 * a VM-Exit.  This allows the host to force bits to be set or
+		 * cleared on behalf of the guest, while still allowing the
+		 * guest modify other bits at will, without triggering VM-Exits.
+		 */
+		if (kvm->arch.force_spec_ctrl_mask)
+			vmcs_write64(IA32_SPEC_CTRL_MASK, kvm->arch.force_spec_ctrl_mask);
+		else
+			vmcs_write64(IA32_SPEC_CTRL_MASK, 0);
+	}
+
 	if (enable_pml) {
 		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
@@ -4853,7 +4887,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		__vmx_vcpu_reset(vcpu);
 
 	vmx->rmode.vm86_active = 0;
-	vmx->spec_ctrl = 0;
+	vmx_set_spec_ctrl(vcpu, 0);
 
 	vmx->msr_ia32_umwait_control = 0;
 
@@ -7211,8 +7245,14 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
 	if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
 		return;
 
-	if (flags & VMX_RUN_SAVE_SPEC_CTRL)
-		vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+	if (flags & VMX_RUN_SAVE_SPEC_CTRL) {
+		if (cpu_has_spec_ctrl_shadow())
+			vmx->spec_ctrl = (vmcs_read64(IA32_SPEC_CTRL_SHADOW) &
+					~vmx->vcpu.kvm->arch.force_spec_ctrl_mask) |
+					 vmx->vcpu.kvm->arch.force_spec_ctrl_value;
+		else
+			vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+	}
 
 	/*
 	 * If the guest/host SPEC_CTRL values differ, restore the host value.
@@ -8598,6 +8638,24 @@ static __init int hardware_setup(void)
 	kvm_caps.tsc_scaling_ratio_frac_bits = 48;
 	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
 	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
+	kvm_caps.supported_force_spec_ctrl = 0;
+
+	if (cpu_has_spec_ctrl_shadow()) {
+		kvm_caps.supported_force_spec_ctrl |= SPEC_CTRL_IBRS;
+
+		if (boot_cpu_has(X86_FEATURE_STIBP))
+			kvm_caps.supported_force_spec_ctrl |= SPEC_CTRL_STIBP;
+
+		if (boot_cpu_has(X86_FEATURE_SSBD))
+			kvm_caps.supported_force_spec_ctrl |= SPEC_CTRL_SSBD;
+
+		if (boot_cpu_has(X86_FEATURE_RRSBA_CTRL) &&
+		    (host_arch_capabilities & ARCH_CAP_RRSBA))
+			kvm_caps.supported_force_spec_ctrl |= SPEC_CTRL_RRSBA_DIS_S;
+
+		if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
+			kvm_caps.supported_force_spec_ctrl |= SPEC_CTRL_BHI_DIS_S;
+	}
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 65786dbe7d60..f26ac82b5a59 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -578,7 +578,8 @@ static inline u8 vmx_get_rvi(void)
 
 #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
 #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL			\
-	(TERTIARY_EXEC_IPI_VIRT)
+	(TERTIARY_EXEC_IPI_VIRT |					\
+	 TERTIARY_EXEC_SPEC_CTRL_SHADOW)
 
 #define BUILD_CONTROLS_SHADOW(lname, uname, bits)						\
 static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val)			\
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 984ea2089efc..9a59b5a93d0e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4836,6 +4836,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM))
 			r |= BIT(KVM_X86_SW_PROTECTED_VM);
 		break;
+	case KVM_CAP_FORCE_SPEC_CTRL:
+		r = !!kvm_caps.supported_force_spec_ctrl;
+		break;
 	default:
 		break;
 	}
@@ -4990,6 +4993,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = kvm_x86_dev_has_attr(&attr);
 		break;
 	}
+	case KVM_GET_SUPPORTED_FORCE_SPEC_CTRL: {
+		r = 0;
+		if (copy_to_user(argp, &kvm_caps.supported_force_spec_ctrl,
+				 sizeof(kvm_caps.supported_force_spec_ctrl)))
+			r = -EFAULT;
+		break;
+	}
 	default:
 		r = -EINVAL;
 		break;
@@ -6729,6 +6739,26 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		}
 		mutex_unlock(&kvm->lock);
 		break;
+	case KVM_CAP_FORCE_SPEC_CTRL:
+		r = -EINVAL;
+
+		mutex_lock(&kvm->lock);
+
+		/*
+		 * Note, only the value is restricted to known bits that KVM
+		 * can force on.  Userspace is allowed to set any mask bits,
+		 * i.e. can prevent the guest from setting a bit, even if KVM
+		 * doesn't support the bit.
+		 */
+		if (kvm_caps.supported_force_spec_ctrl && !kvm->created_vcpus &&
+		    !(~kvm_caps.supported_force_spec_ctrl & cap->args[1]) &&
+		    !(~cap->args[0] & cap->args[1])) {
+			kvm->arch.force_spec_ctrl_mask = cap->args[0];
+			kvm->arch.force_spec_ctrl_value = cap->args[1];
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a8b71803777b..6dd12776b310 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -29,6 +29,7 @@ struct kvm_caps {
 	u64 supported_xcr0;
 	u64 supported_xss;
 	u64 supported_perf_cap;
+	u64 supported_force_spec_ctrl;
 };
 
 void kvm_spurious_fault(void);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2190adbe3002..fb918bdb930c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -917,6 +917,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_FORCE_SPEC_CTRL 236
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
@@ -1243,6 +1244,9 @@ struct kvm_vfio_spapr_tce {
 #define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
 #define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
 
+/* Available with KVM_CAP_FORCE_SPEC_CTRL */
+#define KVM_GET_SUPPORTED_FORCE_SPEC_CTRL _IOR(KVMIO,  0xe4, __u64)
+
 /*
  * ioctls for vcpu fds
  */
-- 
2.39.3





[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux