From: Liran Alon <liran.alon@xxxxxxxxxx> Create a separate mm for KVM that will be active when KVM #VMExit handlers run. Up until the point which we architectully need to access host (or other VM) sensitive data. This patch just create kvm_mm but never makes it active yet. This will be done by next commits. Signed-off-by: Liran Alon <liran.alon@xxxxxxxxxx> Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx> --- arch/x86/kvm/isolation.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/isolation.h | 8 ++++ arch/x86/kvm/x86.c | 10 ++++- 3 files changed, 112 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/isolation.h diff --git a/arch/x86/kvm/isolation.c b/arch/x86/kvm/isolation.c index e25f663..74bc0cd 100644 --- a/arch/x86/kvm/isolation.c +++ b/arch/x86/kvm/isolation.c @@ -7,6 +7,21 @@ #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/printk.h> + +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> + +#include "isolation.h" + +struct mm_struct kvm_mm = { + .mm_rb = RB_ROOT, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(kvm_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(kvm_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(kvm_mm.mmlist), +}; /* * When set to true, KVM #VMExit handlers run in isolated address space @@ -24,3 +39,83 @@ */ static bool __read_mostly address_space_isolation; module_param(address_space_isolation, bool, 0444); + +static int kvm_isolation_init_mm(void) +{ + pgd_t *kvm_pgd; + gfp_t gfp_mask; + + gfp_mask = GFP_KERNEL | __GFP_ZERO; + kvm_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + if (!kvm_pgd) + return -ENOMEM; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * With PTI, we have two PGDs: one the kernel page table, and one + * for the user page table. The PGD with the kernel page table has + * to be the entire kernel address space because paranoid faults + * will unconditionally use it. So we define the KVM address space + * in the user table space, although it will be used in the kernel. + */ + + /* initialize the kernel page table */ + memcpy(kvm_pgd, current->active_mm->pgd, sizeof(pgd_t) * PTRS_PER_PGD); + + /* define kvm_mm with the user page table */ + kvm_mm.pgd = kernel_to_user_pgdp(kvm_pgd); +#else /* CONFIG_PAGE_TABLE_ISOLATION */ + kvm_mm.pgd = kvm_pgd; +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + mm_init_cpumask(&kvm_mm); + init_new_context(NULL, &kvm_mm); + + return 0; +} + +static void kvm_isolation_uninit_mm(void) +{ + pgd_t *kvm_pgd; + + BUG_ON(current->active_mm == &kvm_mm); + + destroy_context(&kvm_mm); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * With PTI, the KVM address space is defined in the user + * page table space, but the full PGD starts with the kernel + * page table space. + */ + kvm_pgd = user_to_kernel_pgdp(kvm_pgd); +#else /* CONFIG_PAGE_TABLE_ISOLATION */ + kvm_pgd = kvm_mm.pgd; +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + kvm_mm.pgd = NULL; + free_pages((unsigned long)kvm_pgd, PGD_ALLOCATION_ORDER); +} + +int kvm_isolation_init(void) +{ + int r; + + if (!address_space_isolation) + return 0; + + r = kvm_isolation_init_mm(); + if (r) + return r; + + pr_info("KVM: x86: Running with isolated address space\n"); + + return 0; +} + +void kvm_isolation_uninit(void) +{ + if (!address_space_isolation) + return; + + kvm_isolation_uninit_mm(); + pr_info("KVM: x86: End of isolated address space\n"); +} diff --git a/arch/x86/kvm/isolation.h b/arch/x86/kvm/isolation.h new file mode 100644 index 0000000..cf8c7d4 --- /dev/null +++ b/arch/x86/kvm/isolation.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_X86_KVM_ISOLATION_H +#define ARCH_X86_KVM_ISOLATION_H + +extern int kvm_isolation_init(void); +extern void kvm_isolation_uninit(void); + +#endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5edc8e..4b7cec2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -29,6 +29,7 @@ #include "cpuid.h" #include "pmu.h" #include "hyperv.h" +#include "isolation.h" #include <linux/clocksource.h> #include <linux/interrupt.h> @@ -6972,10 +6973,14 @@ int kvm_arch_init(void *opaque) goto out_free_x86_fpu_cache; } - r = kvm_mmu_module_init(); + r = kvm_isolation_init(); if (r) goto out_free_percpu; + r = kvm_mmu_module_init(); + if (r) + goto out_uninit_isolation; + kvm_set_mmio_spte_mask(); kvm_x86_ops = ops; @@ -7000,6 +7005,8 @@ int kvm_arch_init(void *opaque) return 0; +out_uninit_isolation: + kvm_isolation_uninit(); out_free_percpu: free_percpu(shared_msrs); out_free_x86_fpu_cache: @@ -7024,6 +7031,7 @@ void kvm_arch_exit(void) #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); #endif + kvm_isolation_uninit(); kvm_x86_ops = NULL; kvm_mmu_module_exit(); free_percpu(shared_msrs); -- 1.7.1