[PATCH v4 09/13] nEPT: Add nEPT violation/misconfigration support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Yang Zhang <yang.z.zhang@xxxxxxxxx>

Inject nEPT fault to L1 guest. This patch is original from Xinhao.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx>
Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |    4 ++++
 arch/x86/kvm/mmu.c              |   34 ++++++++++++++++++++++++++++++++++
 arch/x86/kvm/paging_tmpl.h      |   30 +++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx.c              |   17 +++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 531f47c..58a17c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,6 +286,7 @@ struct kvm_mmu {
 	u64 *pae_root;
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -512,6 +513,9 @@ struct kvm_vcpu_arch {
 	 * instruction.
 	 */
 	bool write_fault_to_shadow_pgtable;
+
+	/* set at EPT violation at this point */
+	unsigned long exit_qualification;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3df3ac3..58ae9db 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3521,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 
+	context->bad_mt_xwr = 0;
+
 	if (!context->nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
 	switch (context->root_level) {
@@ -3576,6 +3578,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	}
 }
 
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	int pte;
+
+	context->rsvd_bits_mask[0][3] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
+	context->rsvd_bits_mask[0][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+
+	/* large page */
+	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+	context->rsvd_bits_mask[1][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
+	context->rsvd_bits_mask[1][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+	
+	for (pte = 0; pte < 64; pte++) {
+		int rwx_bits = pte & 7;
+		int mt = pte >> 3;
+		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
+				rwx_bits == 0x2 || rwx_bits == 0x6 ||
+				(rwx_bits == 0x4 && !execonly))
+			context->bad_mt_xwr |= (1ull << pte);
+	}
+}
+
 static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 {
 	unsigned bit, byte, pfec;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 23a19a5..58d2f87 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -121,14 +121,23 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 #endif
 }
 
+#if PTTYPE == PTTYPE_EPT
+#define CHECK_BAD_MT_XWR(G) mmu->bad_mt_xwr & (1ull << ((G) & 0x3f));
+#else
+#define CHECK_BAD_MT_XWR(G) 0;
+#endif
+
 static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
 {
 	int bit7;
 
 	bit7 = (gpte >> 7) & 1;
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
+	return ((gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0) ||
+		CHECK_BAD_MT_XWR(gpte);
 }
 
+#undef CHECK_BAD_MT_XWR
+
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
@@ -376,6 +385,25 @@ error:
 	walker->fault.vector = PF_VECTOR;
 	walker->fault.error_code_valid = true;
 	walker->fault.error_code = errcode;
+
+#if PTTYPE == PTTYPE_EPT
+	/*
+	 * Use PFERR_RSVD_MASK in erorr_code to to tell if EPT
+	 * misconfiguration requires to be injected. The detection is
+	 * done by is_rsvd_bits_set() above.
+	 *
+	 * We set up the value of exit_qualification to inject:
+	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+	 * [5:3] - Calculated by the page walk of the guest EPT page tables
+	 * [7:8] - Clear to 0.
+	 *
+	 * The other bits are set to 0.
+	 */
+	if (!(errcode & PFERR_RSVD_MASK)) {
+		vcpu->arch.exit_qualification &= 0x7;
+		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+	}
+#endif
 	walker->fault.address = addr;
 	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fc24370..bbfff8d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5321,6 +5321,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	/* ept page table is present? */
 	error_code |= (exit_qualification >> 3) & 0x1;
 
+	vcpu->arch.exit_qualification = exit_qualification;
+
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
@@ -7416,6 +7418,21 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 		entry->ecx |= bit(X86_FEATURE_VMX);
 }
 
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
+		struct x86_exception *fault)
+{
+	struct vmcs12 *vmcs12;
+	nested_vmx_vmexit(vcpu);
+	vmcs12 = get_vmcs12(vcpu);
+
+	if (fault->error_code & PFERR_RSVD_MASK)
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+	else
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+	vmcs12->exit_qualification = vcpu->arch.exit_qualification;
+	vmcs12->guest_physical_address = fault->address;
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux