[RFC PATCH part-3 13/22] pkvm: x86: Introduce MMU pgtable support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Follow the native Linux setting, pKVM work under paging mode to
access its memory, MMU page table need setting up.

Add mmu.c for pKVM, which provide APIs to enable MMU pgtable mapping.

As buddy page allocator is based on MMU pgtable mapping, MMU page table
initialization is divided into two stages before and after buddy page
allocator setup:
- before buddy page allocator setup, MMU page table mm_ops is based on
  early page allocator, the initialization is done by
  pkvm_early_mmu_init(). After this initialization, MMU page table can
  be created, but its allocated pages usage is not tracked.
- after buddy page allocator setup, the vmemmap MMU mapping is done
  through above early MMU, MMU page table mm_ops then can be switched to
  use buddy page allocator, and meantime the early allocated pages'
  refcount need fixing to ensure the track from buddy allocator.
This patch only implements above first stage.

Some alias of kernel-proper symbols also defined for pKVM usage, like
physical_mask for page level mask, sme_me_mask & __default_kernel_pte_mask
for page prot setting.

Signed-off-by: Shaoqin Huang <shaoqin.huang@xxxxxxxxx>
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx>
Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx>
---
 arch/x86/include/asm/pkvm_image_vars.h |  18 ++++
 arch/x86/kernel/vmlinux.lds.S          |   4 +
 arch/x86/kvm/vmx/pkvm/hyp/Makefile     |   2 +-
 arch/x86/kvm/vmx/pkvm/hyp/mmu.c        | 122 +++++++++++++++++++++++++
 arch/x86/kvm/vmx/pkvm/hyp/mmu.h        |  17 ++++
 5 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pkvm_image_vars.h b/arch/x86/include/asm/pkvm_image_vars.h
new file mode 100644
index 000000000000..a7823dc9b981
--- /dev/null
+++ b/arch/x86/include/asm/pkvm_image_vars.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Intel Corporation
+ */
+#ifndef __ASM_x86_PKVM_IMAGE_VARS_H
+#define __ASM_x86_PKVM_IMAGE_VARS_H
+
+#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
+PKVM_ALIAS(physical_mask);
+#endif
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+PKVM_ALIAS(sme_me_mask);
+#endif
+
+PKVM_ALIAS(__default_kernel_pte_mask);
+
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0199d81147db..9f931d39c643 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -541,4 +541,8 @@ INIT_PER_CPU(irq_stack_backing_store);
            "fixed_percpu_data is not at start of per-cpu area");
 #endif
 
+#ifdef CONFIG_PKVM_INTEL
+#include <asm/pkvm_image_vars.h>
+#endif
+
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/Makefile b/arch/x86/kvm/vmx/pkvm/hyp/Makefile
index 39a51230ad3a..cc869624b201 100644
--- a/arch/x86/kvm/vmx/pkvm/hyp/Makefile
+++ b/arch/x86/kvm/vmx/pkvm/hyp/Makefile
@@ -12,7 +12,7 @@ ccflags-y += -D__PKVM_HYP__
 lib-dir		:= lib
 virt-dir	:= ../../../../../../$(KVM_PKVM)
 
-pkvm-hyp-y	:= vmx_asm.o vmexit.o memory.o early_alloc.o pgtable.o
+pkvm-hyp-y	:= vmx_asm.o vmexit.o memory.o early_alloc.o pgtable.o mmu.o
 
 pkvm-hyp-y	+= $(lib-dir)/memset_64.o
 pkvm-hyp-$(CONFIG_RETPOLINE)	+= $(lib-dir)/retpoline.o
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/mmu.c b/arch/x86/kvm/vmx/pkvm/hyp/mmu.c
new file mode 100644
index 000000000000..8b07355c5b96
--- /dev/null
+++ b/arch/x86/kvm/vmx/pkvm/hyp/mmu.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Intel Corporation
+ */
+
+#include <linux/memblock.h>
+#include <asm/kvm_pkvm.h>
+#include <asm/pkvm_spinlock.h>
+#include <mmu.h>
+#include <mmu/spte.h>
+
+#include <pkvm.h>
+#include "early_alloc.h"
+#include "pgtable.h"
+#include "mmu.h"
+#include "debug.h"
+
+static struct pkvm_pgtable hyp_mmu;
+
+static bool mmu_entry_present(void *ptep)
+{
+	return pte_present(*(pte_t *)ptep);
+}
+
+static bool mmu_entry_huge(void *ptep)
+{
+	return pte_huge(*(pte_t *)ptep);
+}
+
+static void mmu_entry_mkhuge(void *ptep)
+{
+	pte_t *ptep_ptr = (pte_t *)ptep;
+
+	*ptep_ptr = pte_mkhuge(*ptep_ptr);
+}
+
+static unsigned long mmu_entry_to_phys(void *ptep)
+{
+	return native_pte_val(*(pte_t *)ptep) & PTE_PFN_MASK;
+}
+
+static u64 mmu_entry_to_prot(void *ptep)
+{
+	return (u64)pte_flags(pte_clear_flags(*(pte_t *)ptep, _PAGE_PSE));
+}
+
+static int mmu_entry_to_index(unsigned long vaddr, int level)
+{
+	return SPTE_INDEX(vaddr, level);
+}
+
+static bool mmu_entry_is_leaf(void *ptep, int level)
+{
+	if (level == PG_LEVEL_4K ||
+		!mmu_entry_present(ptep) ||
+		mmu_entry_huge(ptep))
+		return true;
+
+	return false;
+}
+
+static int mmu_level_entry_size(int level)
+{
+	return PAGE_SIZE / PTRS_PER_PTE;
+}
+
+static int mmu_level_to_entries(int level)
+{
+	return PTRS_PER_PTE;
+}
+
+static unsigned long mmu_level_to_size(int level)
+{
+	return page_level_size(level);
+}
+
+static void mmu_set_entry(void *ptep, u64 pte)
+{
+	native_set_pte((pte_t *)ptep, native_make_pte(pte));
+}
+
+struct pkvm_pgtable_ops mmu_ops = {
+	.pgt_entry_present = mmu_entry_present,
+	.pgt_entry_huge = mmu_entry_huge,
+	.pgt_entry_mkhuge = mmu_entry_mkhuge,
+	.pgt_entry_to_phys = mmu_entry_to_phys,
+	.pgt_entry_to_prot = mmu_entry_to_prot,
+	.pgt_entry_to_index = mmu_entry_to_index,
+	.pgt_entry_is_leaf = mmu_entry_is_leaf,
+	.pgt_level_entry_size = mmu_level_entry_size,
+	.pgt_level_to_entries = mmu_level_to_entries,
+	.pgt_level_to_size = mmu_level_to_size,
+	.pgt_set_entry = mmu_set_entry,
+};
+
+int pkvm_mmu_map(unsigned long vaddr_start, unsigned long phys_start,
+		unsigned long size, int pgsz_mask, u64 prot)
+{
+	int ret;
+
+	ret = pkvm_pgtable_map(&hyp_mmu, vaddr_start, phys_start, size, pgsz_mask, prot);
+
+	return ret;
+}
+
+int pkvm_mmu_unmap(unsigned long vaddr_start, unsigned long phys_start, unsigned long size)
+{
+	int ret;
+
+	ret = pkvm_pgtable_unmap(&hyp_mmu, vaddr_start, phys_start, size);
+
+	return ret;
+}
+
+/* early mmu init before vmemmap ready, use early allocator first */
+int pkvm_early_mmu_init(struct pkvm_pgtable_cap *cap,
+		void *mmu_pool_base, unsigned long mmu_pool_pages)
+{
+	pkvm_early_alloc_init(mmu_pool_base, mmu_pool_pages << PAGE_SHIFT);
+
+	return pkvm_pgtable_init(&hyp_mmu, &pkvm_early_alloc_mm_ops, &mmu_ops, cap, true);
+}
diff --git a/arch/x86/kvm/vmx/pkvm/hyp/mmu.h b/arch/x86/kvm/vmx/pkvm/hyp/mmu.h
new file mode 100644
index 000000000000..1cb5eef6aa2e
--- /dev/null
+++ b/arch/x86/kvm/vmx/pkvm/hyp/mmu.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Intel Corporation
+ */
+#ifndef _PKVM_MMU_H_
+#define _PKVM_MMU_H_
+
+int pkvm_mmu_map(unsigned long vaddr_start, unsigned long phys_start,
+		unsigned long size, int pgsz_mask, u64 prot);
+
+int pkvm_mmu_unmap(unsigned long vaddr_start, unsigned long phys_start,
+		unsigned long size);
+
+int pkvm_early_mmu_init(struct pkvm_pgtable_cap *cap,
+		void *mmu_pool_base, unsigned long mmu_pool_pages);
+
+#endif
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux