[PATCH RFC] KVM: x86: nested: support for MSR loading/storing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



BitVisor hypervisor fails to start a nested guest due to lack of MSR
load/store support in KVM.

This patch fixes this problem according to Intel SDM.


Signed-off-by: Eugene Korenevsky <ekorenevsky@xxxxxxxxx>
---
 arch/x86/include/asm/vmx.h            |   6 ++
 arch/x86/include/uapi/asm/msr-index.h |   3 +
 arch/x86/include/uapi/asm/vmx.h       |   1 +
 arch/x86/kvm/vmx.c                    | 191 +++++++++++++++++++++++++++++++++-
 virt/kvm/kvm_main.c                   |   1 +
 5 files changed, 197 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..e07414c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -454,6 +454,12 @@ struct vmx_msr_entry {
 #define ENTRY_FAIL_VMCS_LINK_PTR	4
 
 /*
+ * VMX Abort codes
+ */
+#define VMX_ABORT_MSR_STORE		1
+#define VMX_ABORT_MSR_LOAD		4
+
+/*
  * VM-instruction error numbers
  */
 enum vm_instruction_error_number {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index e21331c..3c9c601 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -316,6 +316,9 @@
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+#define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
+#define MSR_IA32_SMBASE			0x0000009e
+
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
 #define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe..f5f8a62 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -56,6 +56,7 @@
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MSR_LOAD_FAILURE    34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a951d8..deebc96 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8498,6 +8498,163 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
 }
 
+struct msr_entry {
+	u32 index;
+	u32 reserved;
+	u64 data;
+} __packed;
+
+static bool vmx_msr_switch_area_verify(u32 count, u64 addr, int maxphyaddr)
+{
+	if (count == 0)
+		return true;
+	if ((addr & 0xf) != 0)
+		return false;
+	if ((addr >> maxphyaddr) != 0)
+		return false;
+	if (((addr + count * sizeof(struct msr_entry) - 1) >> maxphyaddr) != 0)
+		return false;
+	return true;
+}
+
+static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
+					 struct vmcs12 *vmcs12)
+{
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+#define VMCS12_MSR_SWITCH_VERIFY(name) { \
+	if (!vmx_msr_switch_area_verify(vmcs12->name##_count, \
+					vmcs12->name##_addr, maxphyaddr)) { \
+		pr_warn_ratelimited( \
+			"%s: invalid MSR_{LOAD,STORE} parameters", \
+			#name); \
+		return false; \
+	} \
+	}
+	VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_load);
+	VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_store);
+	VMCS12_MSR_SWITCH_VERIFY(vm_entry_msr_load);
+	return true;
+}
+
+static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
+				      struct msr_entry *e)
+{
+	if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
+		return false;
+	/* SMM is not supported */
+	if (e->index == MSR_IA32_SMM_MONITOR_CTL)
+		return false;
+	/* x2APIC MSR accesses are not allowed */
+	if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+		return false;
+	if (e->reserved != 0)
+		return false;
+	return true;
+}
+
+static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
+{
+	/* Intel SDM: a processor MAY prevent writing to these MSRs when
+	 * loading guest states on VM entries or saving guest states on VM
+	 * exits.
+	 * Assume our emulated processor DOES prevent writing */
+	return msr_index == MSR_IA32_UCODE_WRITE ||
+	       msr_index == MSR_IA32_UCODE_REV;
+}
+
+static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
+					 u32 count, u64 addr)
+{
+	unsigned int i;
+	struct msr_entry msr_entry;
+	struct msr_data msr;
+
+	for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+		if (kvm_read_guest(vcpu->kvm, addr,
+				   &msr_entry, sizeof(msr_entry))) {
+			pr_warn_ratelimited(
+				"Load MSR %d: cannot read GPA: 0x%llx\n",
+				i, addr);
+			return i;
+		}
+		if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
+			pr_warn_ratelimited(
+			       "Load MSR %d: invalid MSR entry 0x%x 0x%x\n",
+			       i, msr_entry.index, msr_entry.reserved);
+			return i;
+		}
+		msr.host_initiated = 0;
+		msr.index = msr_entry.index;
+		msr.data = msr_entry.data;
+		if (vmx_msr_switch_is_protected_msr(msr.index)) {
+			pr_warn_ratelimited(
+				"Load MSR %d: prevent writing to MSR 0x%x\n",
+				i, msr.index);
+			return i;
+		}
+		if (kvm_set_msr(vcpu, &msr)) {
+			pr_warn_ratelimited(
+			       "Load MSR %d: cannot write 0x%llx to MSR 0x%x\n",
+			       i, msr.data, msr.index);
+			return i;
+		}
+	}
+	return 0;
+}
+
+static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
+				       struct msr_entry *e)
+{
+	/* x2APIC MSR accesses are not allowed */
+	if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+		return false;
+	/* SMM is not supported */
+	if (e->index == MSR_IA32_SMBASE)
+		return false;
+	if (e->reserved != 0)
+		return false;
+	return true;
+}
+
+static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
+					  u32 count, u64 addr)
+{
+	unsigned int i;
+	struct msr_entry msr_entry;
+
+	for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+		if (kvm_read_guest(vcpu->kvm, addr,
+				   &msr_entry, 2 * sizeof(u32))) {
+			pr_warn_ratelimited(
+				"Store MSR %d: cannot read GPA: 0x%llx\n",
+				i, addr);
+			return i;
+		}
+		if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
+			pr_warn_ratelimited(
+			       "Store MSR %d: invalid MSR entry 0x%x 0x%x\n",
+			       i, msr_entry.index, msr_entry.reserved);
+			return i;
+		}
+		if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.data)) {
+			pr_warn_ratelimited(
+				"Store MSR %d: cannot read MSR 0x%x\n",
+				i, msr_entry.index);
+			return i;
+		}
+		if (kvm_write_guest(vcpu->kvm,
+				    addr + offsetof(struct msr_entry, data),
+				    &msr_entry.data, sizeof(msr_entry.data))) {
+			pr_warn_ratelimited(
+				"Store MSR %d: cannot write to GPA: 0x%llx\n",
+				i, addr);
+			return i;
+		}
+	}
+	return 0;
+}
+
 /*
  * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
  * for running an L2 nested guest.
@@ -8507,6 +8664,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	struct vmcs12 *vmcs12;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
+	unsigned int msr;
 	struct loaded_vmcs *vmcs02;
 	bool ia32e;
 
@@ -8556,11 +8714,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if (vmcs12->vm_entry_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_store_count > 0) {
-		pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
-				    __func__);
+	if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -8670,6 +8824,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	prepare_vmcs02(vcpu, vmcs12);
 
+	msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
+				   vmcs12->vm_entry_msr_load_addr);
+	if (msr)
+		nested_vmx_entry_failure(vcpu, vmcs12,
+					 EXIT_REASON_MSR_LOAD_FAILURE, msr);
+
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_emulate_halt(vcpu);
 
@@ -9099,6 +9259,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 }
 
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	vmcs12->abort = abort_code;
+	pr_warn("Nested VMX abort %u\n", abort_code);
+	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
 /*
  * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
  * and modify vmcs12 to make it see what it would expect to see there if
@@ -9118,8 +9287,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
 		       exit_qualification);
 
+	if (exit_reason != EXIT_REASON_INVALID_STATE &&
+	    exit_reason != EXIT_REASON_MSR_LOAD_FAILURE) {
+		if (nested_vmx_store_msrs(vcpu,
+					  vmcs12->vm_exit_msr_store_count,
+					  vmcs12->vm_exit_msr_store_addr))
+			nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE);
+	}
+
 	vmx_load_vmcs01(vcpu);
 
+	if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
+				 vmcs12->vm_exit_msr_load_addr))
+		nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD);
+
 	if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
 	    && nested_exit_intr_ack_set(vcpu)) {
 		int irq = kvm_cpu_get_interrupt(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5b45330..c9d1c4a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1582,6 +1582,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest);
 
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			      gpa_t gpa, unsigned long len)
-- 
2.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux