[PATCH] KVM: x86: nVMX: support for MSR loading/storing

Eugene Korenevsky <ekorenevsky@xxxxxxxxx> · Tue, 9 Dec 2014 03:41:39 +0300

Several hypervisors use MSR loading/storing to run guests.
This patch implements emulation of this feature and allows these
hypervisors to work in L1.

The following is emulated:
- Loading MSRs on VM-entries
- Saving MSRs on VM-exits
- Loading MSRs on VM-exits

Actions taken on loading MSRs:
- MSR load area is verified
- For each MSR entry from load area:
    - MSR load entry is read and verified
    - MSR value is safely written
Actions taken on storing MSRs:
- MSR store area is verified
- For each MSR entry from store area:
    - MSR entry is read and verified
    - MSR value is safely read using MSR index from MSR entry
    - MSR value is written to MSR entry

The code performs checks required by Intel Software Developer Manual.

This patch is partially based on Wincy Wan's work.


Signed-off-by: Eugene Korenevsky <ekorenevsky@xxxxxxxxx>
---
 arch/x86/include/asm/vmx.h            |   6 +
 arch/x86/include/uapi/asm/msr-index.h |   3 +
 arch/x86/include/uapi/asm/vmx.h       |   2 +
 arch/x86/kvm/vmx.c                    | 210 ++++++++++++++++++++++++++++++++--
 virt/kvm/kvm_main.c                   |   1 +
 5 files changed, 215 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 45afaee..8bdb247 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -457,6 +457,12 @@ struct vmx_msr_entry {
 #define ENTRY_FAIL_VMCS_LINK_PTR	4
 
 /*
+ * VMX Abort codes
+ */
+#define VMX_ABORT_MSR_STORE_FAILURE	1
+#define VMX_ABORT_MSR_LOAD_FAILURE	4
+
+/*
  * VM-instruction error numbers
  */
 enum vm_instruction_error_number {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index e21331c..3c9c601 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -316,6 +316,9 @@
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+#define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
+#define MSR_IA32_SMBASE			0x0000009e
+
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
 #define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index b813bf9..52ad8e2 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -56,6 +56,7 @@
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MSR_LOAD_FAILURE    34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
@@ -116,6 +117,7 @@
 	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+	{ EXIT_REASON_MSR_LOAD_FAILURE,      "MSR_LOAD_FAILURE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9bcc871..86dc7db 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8571,6 +8571,168 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
 }
 
+static bool vmx_msr_switch_area_verify(struct kvm_vcpu *vcpu,
+				       unsigned long count_field,
+				       unsigned long addr_field,
+				       int maxphyaddr)
+{
+	u64 count, addr;
+
+	BUG_ON(vmcs12_read_any(vcpu, count_field, &count));
+	BUG_ON(vmcs12_read_any(vcpu, addr_field, &addr));
+	if (!IS_ALIGNED(addr, 16))
+		goto fail;
+	if (addr >> maxphyaddr)
+		goto fail;
+	if ((addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
+		goto fail;
+	return true;
+fail:
+	pr_warn_ratelimited(
+		"nVMX: invalid MSR switch (0x%lx, 0x%lx, %d, %llu, 0x%08llx)",
+		count_field, addr_field, maxphyaddr, count, addr);
+	return false;
+}
+
+static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
+					 struct vmcs12 *vmcs12)
+{
+	int maxphyaddr;
+
+	if (vmcs12->vm_exit_msr_load_count == 0 &&
+	    vmcs12->vm_exit_msr_store_count == 0 &&
+	    vmcs12->vm_entry_msr_load_count == 0)
+		return true; /* Fast path */
+	maxphyaddr = cpuid_maxphyaddr(vcpu);
+	return vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_LOAD_COUNT,
+					  VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) &&
+	       vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_STORE_COUNT,
+					  VM_EXIT_MSR_STORE_ADDR,
+					  maxphyaddr) &&
+	       vmx_msr_switch_area_verify(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
+					  VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr);
+}
+
+static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
+				      struct vmx_msr_entry *e)
+{
+	if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
+		return false;
+	/* SMM is not supported */
+	if (e->index == MSR_IA32_SMM_MONITOR_CTL)
+		return false;
+	/* x2APIC MSR accesses are not allowed */
+	if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+		return false;
+	if (e->reserved != 0)
+		return false;
+	return true;
+}
+
+static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
+{
+	/* Intel SDM: a processor MAY prevent writing to these MSRs when
+	 * loading guest states on VM entries or saving guest states on VM
+	 * exits.
+	 * Assume our emulated processor DOES prevent writing */
+	return msr_index == MSR_IA32_UCODE_WRITE ||
+	       msr_index == MSR_IA32_UCODE_REV;
+}
+
+static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
+					 u32 count, u64 addr)
+{
+	unsigned int i;
+	struct vmx_msr_entry msr_entry;
+	struct msr_data msr;
+
+	for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+		if (kvm_read_guest(vcpu->kvm, addr,
+				   &msr_entry, sizeof(msr_entry))) {
+			pr_warn_ratelimited(
+				"%s MSR %u: cannot read GPA: 0x%llx\n",
+				__func__, i, addr);
+			return i;
+		}
+		if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
+			pr_warn_ratelimited(
+				"%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
+				__func__, i, msr_entry.index,
+				msr_entry.reserved);
+			return i;
+		}
+		msr.host_initiated = false;
+		msr.index = msr_entry.index;
+		msr.data = msr_entry.value;
+		if (vmx_msr_switch_is_protected_msr(msr.index)) {
+			pr_warn_ratelimited(
+				"%s MSR %u: prevent writing to MSR 0x%x\n",
+				__func__, i, msr.index);
+			return i;
+		}
+		if (kvm_set_msr(vcpu, &msr)) {
+			pr_warn_ratelimited(
+				"%s MSR %u: cannot write 0x%llx to MSR 0x%x\n",
+				__func__, i, msr.data, msr.index);
+			return i;
+		}
+	}
+	return 0;
+}
+
+static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
+				       struct vmx_msr_entry *e)
+{
+	/* x2APIC MSR accesses are not allowed */
+	if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+		return false;
+	/* SMM is not supported */
+	if (e->index == MSR_IA32_SMBASE)
+		return false;
+	if (e->reserved != 0)
+		return false;
+	return true;
+}
+
+static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
+					  u32 count, u64 addr)
+{
+	unsigned int i;
+	struct vmx_msr_entry msr_entry;
+
+	for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+		if (kvm_read_guest(vcpu->kvm, addr,
+				   &msr_entry, 2 * sizeof(u32))) {
+			pr_warn_ratelimited(
+				"%s MSR %u: cannot read GPA: 0x%llx\n",
+				__func__, i, addr);
+			return i;
+		}
+		if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
+			pr_warn_ratelimited(
+				"%s MSR %u: invalid MSR entry 0x%x 0x%x\n",
+				__func__, i, msr_entry.index,
+				msr_entry.reserved);
+			return i;
+		}
+		if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.value)) {
+			pr_warn_ratelimited(
+				"%s MSR %u: cannot read MSR 0x%x\n",
+				__func__, i, msr_entry.index);
+			return i;
+		}
+		if (kvm_write_guest(vcpu->kvm,
+				addr + offsetof(struct vmx_msr_entry, value),
+				&msr_entry.value, sizeof(msr_entry.value))) {
+			pr_warn_ratelimited(
+				"%s MSR %u: cannot write to GPA: 0x%llx\n",
+				__func__, i, addr);
+			return i;
+		}
+	}
+	return 0;
+}
+
 /*
  * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
  * for running an L2 nested guest.
@@ -8580,6 +8742,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	struct vmcs12 *vmcs12;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
+	unsigned int msr;
 	struct loaded_vmcs *vmcs02;
 	bool ia32e;
 
@@ -8629,11 +8792,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if (vmcs12->vm_entry_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_load_count > 0 ||
-	    vmcs12->vm_exit_msr_store_count > 0) {
-		pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
-				    __func__);
+	if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -8739,10 +8898,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmx_segment_cache_clear(vmx);
 
-	vmcs12->launch_state = 1;
-
 	prepare_vmcs02(vcpu, vmcs12);
 
+	msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
+				   vmcs12->vm_entry_msr_load_addr);
+	if (msr) {
+		leave_guest_mode(vcpu);
+		vmx_load_vmcs01(vcpu);
+		nested_vmx_entry_failure(vcpu, vmcs12,
+					 EXIT_REASON_MSR_LOAD_FAILURE, msr);
+		return 1;
+	}
+
+	vmcs12->launch_state = 1;
+
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_emulate_halt(vcpu);
 
@@ -9040,6 +9209,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	kvm_clear_interrupt_queue(vcpu);
 }
 
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	vmcs12->abort = abort_code;
+	pr_warn("Nested VMX abort %u\n", abort_code);
+	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
 /*
  * A part of what we need to when the nested L2 guest exits and we want to
  * run its L1 parent, is to reset L1's guest state to the host state specified
@@ -9172,6 +9350,19 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
+				 vmcs12->vm_exit_msr_load_addr))
+		nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD_FAILURE);
+}
+
+static bool vmx_is_entry_failure(u32 exit_reason)
+{
+	if ((exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) == 0)
+		return false;
+	exit_reason &= ~VMX_EXIT_REASONS_FAILED_VMENTRY;
+	return exit_reason == EXIT_REASON_INVALID_STATE ||
+	       exit_reason == EXIT_REASON_MSR_LOAD_FAILURE;
 }
 
 /*
@@ -9193,6 +9384,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
 		       exit_qualification);
 
+	if (nested_vmx_store_msrs(vcpu,
+				  vmcs12->vm_exit_msr_store_count,
+				  vmcs12->vm_exit_msr_store_addr))
+		nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE_FAILURE);
+
 	vmx_load_vmcs01(vcpu);
 
 	if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c5c186a..77179c5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1579,6 +1579,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest);
 
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			      gpa_t gpa, unsigned long len)
-- 
2.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html