[PATCH 08/10] nEPT: Nested INVEPT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If we let L1 use EPT, we should probably also support the INVEPT instruction.

Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx>
---
 arch/x86/include/asm/vmx.h |    2 
 arch/x86/kvm/vmx.c         |  112 +++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

--- .before/arch/x86/include/asm/vmx.h	2011-11-10 11:33:59.000000000 +0200
+++ .after/arch/x86/include/asm/vmx.h	2011-11-10 11:33:59.000000000 +0200
@@ -279,6 +279,7 @@ enum vmcs_field {
 #define EXIT_REASON_APIC_ACCESS         44
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_INVEPT		50
 #define EXIT_REASON_WBINVD		54
 #define EXIT_REASON_XSETBV		55
 
@@ -404,6 +405,7 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT				(1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
 #define VMX_EPT_1GB_PAGE_BIT			(1ull << 17)
+#define VMX_EPT_INVEPT_BIT			(1ull << 20)
 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
--- .before/arch/x86/kvm/vmx.c	2011-11-10 11:33:59.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c	2011-11-10 11:33:59.000000000 +0200
@@ -351,6 +351,8 @@ struct nested_vmx {
 	struct list_head vmcs02_pool;
 	int vmcs02_num;
 	u64 vmcs01_tsc_offset;
+	/* Remember last EPT02, for single-context INVEPT optimization */
+	u64 last_eptp02;
 	/* L2 must run next, and mustn't decide to exit to L1. */
 	bool nested_run_pending;
 	/*
@@ -1987,6 +1989,10 @@ static __init void nested_vmx_setup_ctls
 	/* ept capabilities */
 	if (nested_ept) {
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT;
+		nested_vmx_ept_caps |=
+			VMX_EPT_INVEPT_BIT | VMX_EPT_EXTENT_GLOBAL_BIT |
+			VMX_EPT_EXTENT_CONTEXT_BIT |
+			VMX_EPT_EXTENT_INDIVIDUAL_BIT;
 		nested_vmx_ept_caps &= vmx_capability.ept;
 	} else
 		nested_vmx_ept_caps = 0;
@@ -5568,6 +5574,105 @@ static int handle_vmptrst(struct kvm_vcp
 	return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+	u32 vmx_instruction_info;
+	unsigned long type;
+	gva_t gva;
+	struct x86_exception e;
+	struct {
+		u64 eptp, gpa;
+	} operand;
+
+
+	if (!nested_ept || !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	/* According to the Intel VMX instruction reference, the memory
+	 * operand is read even if it isn't needed (e.g., for type==global)
+	 */
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+			vmx_instruction_info, &gva))
+		return 1;
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+				sizeof(operand), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+	switch (type) {
+	case VMX_EPT_EXTENT_GLOBAL:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_GLOBAL_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			ept_sync_global();
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	case VMX_EPT_EXTENT_CONTEXT:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_CONTEXT_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			/*
+			 * We efficiently handle the common case, of L1
+			 * invalidating the last eptp it used to run L2.
+			 * TODO: Instead of saving one last_eptp02, look up
+			 * operand.eptp in the shadow EPT table cache, to
+			 * find its shadow. Then last_eptp02 won't be needed.
+			 */
+			struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+			struct vcpu_vmx *vmx = to_vmx(vcpu);
+			if (vmcs12 && nested_cpu_has_ept(vmcs12) &&
+			    (vmcs12->ept_pointer == operand.eptp) &&
+			    vmx->nested.last_eptp02)
+				ept_sync_context(vmx->nested.last_eptp02);
+			else
+				ept_sync_global();
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+			struct vcpu_vmx *vmx = to_vmx(vcpu);
+			if (vmcs12 && nested_cpu_has_ept(vmcs12) &&
+			    (vmcs12->ept_pointer == operand.eptp) &&
+			    vmx->nested.last_eptp02)
+				ept_sync_individual_addr(
+					vmx->nested.last_eptp02, operand.gpa);
+			else
+				ept_sync_global();
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	default:
+		nested_vmx_failValid(vcpu,
+			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+	}
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5609,6 +5714,7 @@ static int (*kvm_vmx_exit_handlers[])(st
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+	[EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -5793,6 +5899,7 @@ static bool nested_vmx_exit_handled(stru
 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
 	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+	case EXIT_REASON_INVEPT:
 		/*
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7056,6 +7163,11 @@ void prepare_vmcs12(struct kvm_vcpu *vcp
 	/* clear vm-entry fields which are to be cleared on exit */
 	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
 		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+
+	/* For single-context INVEPT optimization */
+	if (nested_cpu_has_ept(vmcs12))
+		to_vmx(vcpu)->nested.last_eptp02 = vmcs_read64(EPT_POINTER);
+
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux