[RFC PATCH part-3 20/22] pkvm: x86: Add pgtable API pkvm_pgtable_lookup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add pkvm_pgtable_lookup API for pgtable. It can be used to walk page
table for a translation lookup, such as looking up if a page mapping is
present in a specific page table.

Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/kvm/vmx/pkvm/hyp/ept.c     |  6 +++
 arch/x86/kvm/vmx/pkvm/hyp/memory.h  |  2 +
 arch/x86/kvm/vmx/pkvm/hyp/mmu.c     |  6 +++
 arch/x86/kvm/vmx/pkvm/hyp/pgtable.c | 74 ++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx/pkvm/hyp/pgtable.h |  5 ++
 5 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.c b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
index 5b7b0d84b457..10d226d3ec59 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/ept.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.c
@@ -102,6 +102,11 @@ static int ept_level_to_entries(int level)
 	return SPTE_ENT_PER_PAGE;
 }
 
+static u64 ept_level_page_mask(int level)
+{
+	return (~((1UL << SPTE_LEVEL_SHIFT(level)) - 1));
+}
+
 static unsigned long ept_level_to_size(int level)
 {
 	return KVM_HPAGE_SIZE(level);
@@ -119,6 +124,7 @@ struct pkvm_pgtable_ops ept_ops = {
 	.pgt_entry_to_phys = ept_entry_to_phys,
 	.pgt_entry_to_prot = ept_entry_to_prot,
 	.pgt_entry_to_index = ept_entry_to_index,
+	.pgt_level_page_mask = ept_level_page_mask,
 	.pgt_entry_is_leaf = ept_entry_is_leaf,
 	.pgt_level_entry_size = ept_level_entry_size,
 	.pgt_level_to_entries = ept_level_to_entries,
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/memory.h b/arch/x86/kvm/vmx/pkvm/hyp/memory.h
index c2eee487687a..87b53275bc74 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/memory.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/memory.h
@@ -7,6 +7,8 @@
 
 #include <asm/kvm_pkvm.h>
 
+#define INVALID_ADDR (~0UL)
+
 unsigned long pkvm_virt_to_symbol_phys(void *virt);
 #define __pkvm_pa_symbol(x) pkvm_virt_to_symbol_phys((void *)x)
 
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/mmu.c b/arch/x86/kvm/vmx/pkvm/hyp/mmu.c
index 0902f457d682..7684d16dd2c9 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/mmu.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/mmu.c
@@ -109,6 +109,11 @@ static void mmu_set_entry(void *ptep, u64 pte)
 	native_set_pte((pte_t *)ptep, native_make_pte(pte));
 }
 
+static u64 mmu_level_page_mask(int level)
+{
+	return (~((1UL << SPTE_LEVEL_SHIFT(level)) - 1));
+}
+
 struct pkvm_pgtable_ops mmu_ops = {
 	.pgt_entry_present = mmu_entry_present,
 	.pgt_entry_huge = mmu_entry_huge,
@@ -116,6 +121,7 @@ struct pkvm_pgtable_ops mmu_ops = {
 	.pgt_entry_to_phys = mmu_entry_to_phys,
 	.pgt_entry_to_prot = mmu_entry_to_prot,
 	.pgt_entry_to_index = mmu_entry_to_index,
+	.pgt_level_page_mask = mmu_level_page_mask,
 	.pgt_entry_is_leaf = mmu_entry_is_leaf,
 	.pgt_level_entry_size = mmu_level_entry_size,
 	.pgt_level_to_entries = mmu_level_to_entries,
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
index 29af06547ad1..d55acc84f4e1 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
@@ -27,6 +27,13 @@ struct pkvm_pgtable_unmap_data {
 	unsigned long phys;
 };
 
+struct pkvm_pgtable_lookup_data {
+	unsigned long vaddr;
+	unsigned long phys;
+	u64 prot;
+	int level;
+};
+
 static bool leaf_mapping_allowed(struct pkvm_pgtable_ops *pgt_ops,
 				unsigned long vaddr,
 				unsigned long vaddr_end,
@@ -273,6 +280,41 @@ static int pgtable_unmap_cb(struct pkvm_pgtable *pgt, unsigned long vaddr,
 	return 0;
 }
 
+static int pgtable_lookup_cb(struct pkvm_pgtable *pgt,
+			    unsigned long aligned_vaddr,
+			    unsigned long aligned_vaddr_end,
+			    int level,
+			    void *ptep,
+			    unsigned long flags,
+			    struct pgt_flush_data *flush_data,
+			    void *const arg)
+{
+	struct pkvm_pgtable_lookup_data *data = arg;
+	struct pkvm_pgtable_ops *pgt_ops = pgt->pgt_ops;
+	u64 pte = atomic64_read((atomic64_t *)ptep);
+
+	data->phys = INVALID_ADDR;
+	data->prot = 0;
+	data->level = level;
+
+	/*
+	 * This cb shall only be called for leaf, if now it is not a leaf
+	 * that means the pte is changed by others, we shall re-walk the pgtable
+	 */
+	if (unlikely(!pgt_ops->pgt_entry_is_leaf(&pte, level)))
+		return -EAGAIN;
+
+	if (pgt_ops->pgt_entry_present(&pte)) {
+		unsigned long offset =
+			data->vaddr & ~pgt_ops->pgt_level_page_mask(level);
+
+		data->phys = pgt_ops->pgt_entry_to_phys(&pte) + offset;
+		data->prot = pgt_ops->pgt_entry_to_prot(&pte);
+	}
+
+	return PGTABLE_WALK_DONE;
+}
+
 static int pgtable_free_cb(struct pkvm_pgtable *pgt,
 			    unsigned long vaddr,
 			    unsigned long vaddr_end,
@@ -367,7 +409,7 @@ static int _pgtable_walk(struct pgt_walk_data *data, void *ptep, int level)
 			break;
 
 		ret = pgtable_visit(data, (ptep + idx * entry_size), level);
-		if (ret < 0)
+		if (ret)
 			return ret;
 	}
 
@@ -469,6 +511,36 @@ int pkvm_pgtable_unmap(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 	return pgtable_walk(pgt, vaddr_start, size, &walker);
 }
 
+void pkvm_pgtable_lookup(struct pkvm_pgtable *pgt, unsigned long vaddr,
+		     unsigned long *pphys, u64 *pprot, int *plevel)
+{
+	struct pkvm_pgtable_lookup_data data = {
+		.vaddr = vaddr,
+	};
+	struct pkvm_pgtable_walker walker = {
+		.cb = pgtable_lookup_cb,
+		.arg = &data,
+		.flags = PKVM_PGTABLE_WALK_LEAF,
+	};
+	int ret, retry_cnt = 0;
+
+retry:
+	ret = pgtable_walk(pgt, vaddr, PAGE_SIZE, &walker);
+	/*
+	 * we give 5 times chance to re-walk pgtable if others change the
+	 * PTE during above pgtable walk.
+	 */
+	if ((ret == -EAGAIN) && (retry_cnt++ < 5))
+		goto retry;
+
+	if (pphys)
+		*pphys = data.phys;
+	if (pprot)
+		*pprot = data.prot;
+	if (plevel)
+		*plevel = data.level;
+}
+
 void pkvm_pgtable_destroy(struct pkvm_pgtable *pgt)
 {
 	unsigned long size;
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
index 5035b21e6aa0..00d3742b7f48 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
@@ -25,6 +25,7 @@ struct pkvm_pgtable_ops {
 	unsigned long (*pgt_entry_to_phys)(void *pte);
 	u64 (*pgt_entry_to_prot)(void *pte);
 	int (*pgt_entry_to_index)(unsigned long vaddr, int level);
+	u64 (*pgt_level_page_mask)(int level);
 	bool (*pgt_entry_is_leaf)(void *ptep, int level);
 	int (*pgt_level_entry_size)(int level);
 	int (*pgt_level_to_entries)(int level);
@@ -51,6 +52,8 @@ typedef int (*pgtable_visit_fn_t)(struct pkvm_pgtable *pgt, unsigned long vaddr,
 				  unsigned long flags, struct pgt_flush_data *flush_data,
 				  void *const arg);
 
+#define PGTABLE_WALK_DONE      1
+
 struct pkvm_pgtable_walker {
 	const pgtable_visit_fn_t cb;
 	void *const arg;
@@ -72,5 +75,7 @@ int pkvm_pgtable_map(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 		int pgsz_mask, u64 entry_prot);
 int pkvm_pgtable_unmap(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 		unsigned long phys_start, unsigned long size);
+void pkvm_pgtable_lookup(struct pkvm_pgtable *pgt, unsigned long vaddr,
+		unsigned long *pphys, u64 *pprot, int *plevel);
 void pkvm_pgtable_destroy(struct pkvm_pgtable *pgt);
 #endif
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux