[RFC PATCH part-7 12/12] pkvm: x86: Use page state API in shadow EPT for normal VM

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add map_leaf & free_leaf override helper functions for shadow EPT, and
use page state API in these helper functions to support shadow EPT
invalidation, destroy and EPT violation for normal VM.

When map a page for a normal VM in shadow EPT, use the share API to mark
this page is shared which is previously owned by the host VM but now is
shared between the host VM and the normal VM. And when invalidate or
destroy shadow EPT, mark this page as unshared which means owned by the
host VM again.

Under the state machine of page state transition, pKVM does not support
multiple guest pages mapping to same host page, it's conflict with KSM,
so just disable it under pKVM Kconfig.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx>
Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/kvm/Kconfig            |  1 +
 arch/x86/kvm/vmx/pkvm/hyp/ept.c | 60 +++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/pkvm/hyp/ept.h |  1 +
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c2f66d3eef37..3eb7a2624245 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -91,6 +91,7 @@ config PKVM_INTEL
 	bool "pKVM for Intel processors support"
 	depends on KVM_INTEL=y
 	depends on X86_64
+	depends on !KSM
 	help
 	  Provides support for pKVM on Intel processors.
 
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.c b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
index 9e5aeb8b239e..f942e2e7f3d8 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/ept.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
@@ -317,6 +317,58 @@ static struct pkvm_mm_ops shadow_ept_mm_ops = {
 	.flush_tlb = flush_tlb_noop,
 };
 
+static int pkvm_shadow_ept_map_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr, int level,
+				    void *ptep, struct pgt_flush_data *flush_data, void *arg)
+{
+	struct pkvm_pgtable_map_data *data = arg;
+	struct pkvm_pgtable_ops *pgt_ops = pgt->pgt_ops;
+	unsigned long level_size = pgt_ops->pgt_level_to_size(level);
+	unsigned long map_phys = data->phys & PAGE_MASK;
+	int ret;
+
+	/*
+	 * It is possible that another CPU just created same mapping when
+	 * multiple EPT violations happen on different CPUs.
+	 */
+	if (!pgt_ops->pgt_entry_present(ptep)) {
+		ret = __pkvm_host_share_guest(map_phys, pgt, vaddr, level_size, data->prot);
+		if (ret)
+			return ret;
+	}
+
+	/* Increase the physical address for the next mapping */
+	data->phys += level_size;
+
+	return 0;
+}
+
+static int pkvm_shadow_ept_free_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr, int level,
+				     void *ptep, struct pgt_flush_data *flush_data, void *arg)
+{
+	unsigned long phys = pgt->pgt_ops->pgt_entry_to_phys(ptep);
+	unsigned long size = pgt->pgt_ops->pgt_level_to_size(level);
+
+	if (pgt->pgt_ops->pgt_entry_present(ptep)) {
+		int ret;
+
+		/*
+		 * The pgtable_free_cb in this current page walker is still walking
+		 * the shadow EPT so cannot allow the  __pkvm_host_unshare_guest()
+		 * release shadow EPT table pages.
+		 *
+		 * The table pages will be freed later by the pgtable_free_cb itself.
+		 */
+		pgt->mm_ops->get_page(ptep);
+		ret = __pkvm_host_unshare_guest(phys, pgt, vaddr, size);
+		pgt->mm_ops->put_page(ptep);
+		flush_data->flushtlb |= true;
+
+		return ret;
+	}
+
+	return 0;
+}
+
 void pkvm_invalidate_shadow_ept(struct shadow_ept_desc *desc)
 {
 	struct pkvm_shadow_vm *vm = sept_desc_to_shadow_vm(desc);
@@ -328,7 +380,7 @@ void pkvm_invalidate_shadow_ept(struct shadow_ept_desc *desc)
 	if (!is_valid_eptp(desc->shadow_eptp))
 		goto out;
 
-	pkvm_pgtable_unmap(sept, 0, size, NULL);
+	pkvm_pgtable_unmap(sept, 0, size, pkvm_shadow_ept_free_leaf);
 
 	flush_ept(desc->shadow_eptp);
 out:
@@ -343,7 +395,7 @@ void pkvm_shadow_ept_deinit(struct shadow_ept_desc *desc)
 	pkvm_spin_lock(&vm->lock);
 
 	if (desc->shadow_eptp) {
-		pkvm_pgtable_destroy(sept, NULL);
+		pkvm_pgtable_destroy(sept, pkvm_shadow_ept_free_leaf);
 
 		flush_ept(desc->shadow_eptp);
 
@@ -459,8 +511,10 @@ pkvm_handle_shadow_ept_violation(struct shadow_vcpu_state *shadow_vcpu, u64 l2_g
 		unsigned long level_size = pgt_ops->pgt_level_to_size(level);
 		unsigned long gpa = ALIGN_DOWN(l2_gpa, level_size);
 		unsigned long hpa = ALIGN_DOWN(host_gpa2hpa(phys), level_size);
+		u64 prot = gprot & EPT_PROT_MASK;
 
-		if (!pkvm_pgtable_map(sept, gpa, hpa, level_size, 0, gprot, NULL))
+		if (!pkvm_pgtable_map(sept, gpa, hpa, level_size, 0,
+					prot, pkvm_shadow_ept_map_leaf))
 			ret = PKVM_HANDLED;
 	}
 out:
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.h b/arch/x86/kvm/vmx/pkvm/hyp/ept.h
index 9d7d2c2f9be3..2ad2fab4a88d 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/ept.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.h
@@ -11,6 +11,7 @@
 				(MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT))
 #define HOST_EPT_DEF_MMIO_PROT	(VMX_EPT_RWX_MASK |				\
 				(MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT))
+#define EPT_PROT_MASK		(VMX_EPT_RWX_MASK | VMX_EPT_MT_MASK | VMX_EPT_IPAT_BIT)
 
 enum sept_handle_ret {
 	PKVM_NOT_HANDLED,
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux