[RFC PATCH part-7 01/12] pkvm: x86: Introduce pkvm_pgtable_annotate

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Shaoqin Huang <shaoqin.huang@xxxxxxxxx>

To protect guest memory, pKVM needs a mechanism to record page's
ownership. This will help memory ownership's transition among
different entities - host, guests and pKVM hypervisor.

Host EPT page-table is a good place to record the page's ownership, as
by default, host VM owns almost all the memory resource, so it shall
have almost all memory mapping in its EPT page-table. And host VM will
manage these memory resource by allocating to different guests, so host
VM knows each page's exact owner.

pKVM uses the ignored bits([12,31]) of invalid mappings in the host EPT
page entry to store the unique identifier of the page owner. Choose to
use these 20 bits is trying to avoid to conflict with low 12 pte prot
bits.

Introduce pkvm_pgtable_annotate to help set ownership id in the pgtable
PTE, it re-uses most of the map() logic, but ends up creating invalid
mapping with 'annotation' instead. This impacts how pKVM do refcount as
it now need to count invalid mappings when they are used for ownership
tracking.

Signed-off-by: Shaoqin Huang <shaoqin.huang@xxxxxxxxx>
Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/kvm/vmx/pkvm/hyp/pgtable.c | 78 +++++++++++++++++++++++------
 arch/x86/kvm/vmx/pkvm/hyp/pgtable.h |  2 +
 2 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
index 54107d4685ed..95aef57d8ed7 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
+++ b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c
@@ -20,6 +20,7 @@ struct pgt_walk_data {
 
 struct pkvm_pgtable_map_data {
 	unsigned long phys;
+	u64 annotation;
 	u64 prot;
 	int pgsz_mask;
 };
@@ -35,6 +36,21 @@ struct pkvm_pgtable_lookup_data {
 	int level;
 };
 
+static bool pkvm_phys_is_valid(u64 phys)
+{
+	return phys != INVALID_ADDR;
+}
+
+static bool pgtable_pte_is_counted(u64 pte)
+{
+	/*
+	 * Due to we use the invalid pte to record the page ownership,
+	 * the refcount tracks both valid and invalid pte if the pte is
+	 * not 0.
+	 */
+	return !!pte;
+}
+
 static bool leaf_mapping_valid(struct pkvm_pgtable_ops *pgt_ops,
 			       unsigned long vaddr,
 			       unsigned long vaddr_end,
@@ -64,7 +80,7 @@ static bool leaf_mapping_allowed(struct pkvm_pgtable_ops *pgt_ops,
 {
 	unsigned long page_size = pgt_ops->pgt_level_to_size(level);
 
-	if (!IS_ALIGNED(phys, page_size))
+	if (pkvm_phys_is_valid(phys) && !IS_ALIGNED(phys, page_size))
 		return false;
 
 	return leaf_mapping_valid(pgt_ops, vaddr, vaddr_end, pgsz_mask, level);
@@ -97,7 +113,7 @@ static int pgtable_map_try_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr,
 {
 	struct pkvm_pgtable_ops *pgt_ops = pgt->pgt_ops;
 	struct pkvm_mm_ops *mm_ops = pgt->mm_ops;
-	u64 new;
+	u64 old = *(u64 *)ptep, new;
 
 	if (!leaf_mapping_allowed(pgt_ops, vaddr, vaddr_end,
 				 data->phys, data->pgsz_mask, level)) {
@@ -105,20 +121,28 @@ static int pgtable_map_try_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr,
 		return (level == PG_LEVEL_4K ? -EINVAL : -E2BIG);
 	}
 
-	new = data->phys | data->prot;
-	if (level != PG_LEVEL_4K)
-		pgt_ops->pgt_entry_mkhuge(&new);
+	if (pkvm_phys_is_valid(data->phys)) {
+		new = data->phys | data->prot;
+		if (level != PG_LEVEL_4K)
+			pgt_ops->pgt_entry_mkhuge(&new);
+	} else {
+		new = data->annotation;
+	}
 
-	if (pgt_ops->pgt_entry_present(ptep)) {
-		pgt_ops->pgt_set_entry(ptep, 0);
-		flush_data->flushtlb |= true;
+	if (pgtable_pte_is_counted(old)) {
+		if (pgt_ops->pgt_entry_present(ptep)) {
+			pgt_ops->pgt_set_entry(ptep, 0);
+			flush_data->flushtlb |= true;
+		}
 		mm_ops->put_page(ptep);
 	}
 
-	mm_ops->get_page(ptep);
-	pgt_ops->pgt_set_entry(ptep, new);
+	if (pgtable_pte_is_counted(new))
+		mm_ops->get_page(ptep);
 
-	data->phys += page_level_size(level);
+	pgt_ops->pgt_set_entry(ptep, new);
+	if (pkvm_phys_is_valid(data->phys))
+		data->phys += page_level_size(level);
 
 	return 0;
 }
@@ -489,12 +513,13 @@ int pkvm_pgtable_init(struct pkvm_pgtable *pgt,
 	return 0;
 }
 
-int pkvm_pgtable_map(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
-		     unsigned long phys_start, unsigned long size,
-		     int pgsz_mask, u64 prot)
+static int __pkvm_pgtable_map(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
+		     unsigned long phys, unsigned long size,
+		     int pgsz_mask, u64 prot, u64 annotation)
 {
 	struct pkvm_pgtable_map_data data = {
-		.phys = ALIGN_DOWN(phys_start, PAGE_SIZE),
+		.phys = phys,
+		.annotation = annotation,
 		.prot = prot,
 		.pgsz_mask = pgsz_mask ? pgt->allowed_pgsz & pgsz_mask :
 					 pgt->allowed_pgsz,
@@ -508,6 +533,14 @@ int pkvm_pgtable_map(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 	return pgtable_walk(pgt, vaddr_start, size, &walker);
 }
 
+int pkvm_pgtable_map(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
+		     unsigned long phys_start, unsigned long size,
+		     int pgsz_mask, u64 prot)
+{
+	return __pkvm_pgtable_map(pgt, vaddr_start, ALIGN_DOWN(phys_start, PAGE_SIZE),
+				  size, pgsz_mask, prot, 0);
+}
+
 int pkvm_pgtable_unmap(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 		       unsigned long size)
 {
@@ -585,3 +618,18 @@ void pkvm_pgtable_destroy(struct pkvm_pgtable *pgt)
 	virt_root = pgt->mm_ops->phys_to_virt(pgt->root_pa);
 	pgt->mm_ops->put_page(virt_root);
 }
+
+/*
+ * pkvm_pgtable_annotate() - Unmap and annotate pages to track ownership.
+ * @annotation:		The value stored in the invalid pte.
+ * 			@annotation[2:0] must be 0.
+ */
+int pkvm_pgtable_annotate(struct pkvm_pgtable *pgt, unsigned long addr,
+			  unsigned long size, u64 annotation)
+{
+	if (pgt->pgt_ops->pgt_entry_present(&annotation))
+		return -EINVAL;
+
+	return __pkvm_pgtable_map(pgt, addr, INVALID_ADDR, size,
+			1 << PG_LEVEL_4K, 0, annotation);
+}
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
index 61ee00ee07af..cb6645e96409 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
+++ b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.h
@@ -80,4 +80,6 @@ int pkvm_pgtable_unmap_safe(struct pkvm_pgtable *pgt, unsigned long vaddr_start,
 void pkvm_pgtable_lookup(struct pkvm_pgtable *pgt, unsigned long vaddr,
 		unsigned long *pphys, u64 *pprot, int *plevel);
 void pkvm_pgtable_destroy(struct pkvm_pgtable *pgt);
+int pkvm_pgtable_annotate(struct pkvm_pgtable *pgt, unsigned long addr,
+			  unsigned long size, u64 annotation);
 #endif
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux