[RFC PATCH 08/28] kvm: mmu: Init / Uninit the direct MMU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The direct MMU introduces several new fields that need to be initialized
and torn down. Add functions to do that initialization / cleanup.

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  51 ++++++++----
 arch/x86/kvm/mmu.c              | 132 +++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c              |  16 +++-
 3 files changed, 169 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 23edf56cf577c..1f8164c577d50 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -236,6 +236,22 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING	1
 
+#define HF_GIF_MASK		(1 << 0)
+#define HF_HIF_MASK		(1 << 1)
+#define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
+#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK		(1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
+
+#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#define KVM_ADDRESS_SPACE_NUM 2
+
+#define kvm_arch_vcpu_memslots_id(vcpu) \
+		((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+
 struct kvm_kernel_irq_routing_entry;
 
 /*
@@ -940,6 +956,24 @@ struct kvm_arch {
 	bool exception_payload_enabled;
 
 	struct kvm_pmu_event_filter *pmu_event_filter;
+
+	/*
+	 * Whether the direct MMU is enabled for this VM. This contains a
+	 * snapshot of the direct MMU module parameter from when the VM was
+	 * created and remains unchanged for the life of the VM. If this is
+	 * true, direct MMU handler functions will run for various MMU
+	 * operations.
+	 */
+	bool direct_mmu_enabled;
+	/*
+	 * Indicates that the paging structure built by the direct MMU is
+	 * currently the only one in use. If nesting is used, prompting the
+	 * creation of shadow page tables for L2, this will be set to false.
+	 * While this is true, only direct MMU handlers will be run for many
+	 * MMU functions. Ignored if !direct_mmu_enabled.
+	 */
+	bool pure_direct_mmu;
+	hpa_t direct_root_hpa[KVM_ADDRESS_SPACE_NUM];
 };
 
 struct kvm_vm_stat {
@@ -1255,7 +1289,7 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
@@ -1519,21 +1553,6 @@ enum {
 	TASK_SWITCH_GATE = 3,
 };
 
-#define HF_GIF_MASK		(1 << 0)
-#define HF_HIF_MASK		(1 << 1)
-#define HF_VINTR_MASK		(1 << 2)
-#define HF_NMI_MASK		(1 << 3)
-#define HF_IRET_MASK		(1 << 4)
-#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
-#define HF_SMM_MASK		(1 << 6)
-#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
-
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
-
 asmlinkage void kvm_spurious_fault(void);
 
 /*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 50413f17c7cd0..788edbda02f69 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -47,6 +47,10 @@
 #include <asm/kvm_page_track.h>
 #include "trace.h"
 
+static bool __read_mostly direct_mmu_enabled;
+module_param_named(enable_direct_mmu, direct_mmu_enabled, bool,
+		   S_IRUGO | S_IWUSR);
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -3754,27 +3758,56 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	*root_hpa = INVALID_PAGE;
 }
 
+static bool is_direct_mmu_root(struct kvm *kvm, hpa_t root)
+{
+	int as_id;
+
+	for (as_id = 0; as_id < KVM_ADDRESS_SPACE_NUM; as_id++)
+		if (root == kvm->arch.direct_root_hpa[as_id])
+			return true;
+
+	return false;
+}
+
 /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
 void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			ulong roots_to_free)
 {
 	int i;
 	LIST_HEAD(invalid_list);
-	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
 
 	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
 
-	/* Before acquiring the MMU lock, see if we need to do any real work. */
-	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
-			    VALID_PAGE(mmu->prev_roots[i].hpa))
-				break;
+	/*
+	 * Direct MMU paging structures follow the life of the VM, so instead of
+	 * destroying direct MMU paging structure root, simply mark the root
+	 * HPA pointing to it as invalid.
+	 */
+	if (vcpu->kvm->arch.direct_mmu_enabled &&
+	    roots_to_free & KVM_MMU_ROOT_CURRENT &&
+	    is_direct_mmu_root(vcpu->kvm, mmu->root_hpa))
+		mmu->root_hpa = INVALID_PAGE;
 
-		if (i == KVM_MMU_NUM_PREV_ROOTS)
-			return;
+	if (!VALID_PAGE(mmu->root_hpa))
+		roots_to_free &= ~KVM_MMU_ROOT_CURRENT;
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) {
+			if (is_direct_mmu_root(vcpu->kvm,
+					       mmu->prev_roots[i].hpa))
+				mmu->prev_roots[i].hpa = INVALID_PAGE;
+			if (!VALID_PAGE(mmu->prev_roots[i].hpa))
+				roots_to_free &= ~KVM_MMU_ROOT_PREVIOUS(i);
+		}
 	}
 
+	/*
+	 * If there are no valid roots that need freeing at this point, avoid
+	 * acquiring the MMU lock and return.
+	 */
+	if (!roots_to_free)
+		return;
+
 	write_lock(&vcpu->kvm->mmu_lock);
 
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
@@ -3782,7 +3815,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
 					   &invalid_list);
 
-	if (free_active_root) {
+	if (roots_to_free & KVM_MMU_ROOT_CURRENT) {
 		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
 		    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
 			mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
@@ -3820,7 +3853,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 	unsigned i;
 
-	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
+	if (vcpu->kvm->arch.direct_mmu_enabled) {
+		// TODO: Support 5 level paging in the direct MMU
+		BUG_ON(vcpu->arch.mmu->shadow_root_level > PT64_ROOT_4LEVEL);
+		vcpu->arch.mmu->root_hpa = vcpu->kvm->arch.direct_root_hpa[
+			kvm_arch_vcpu_memslots_id(vcpu)];
+	} else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		write_lock(&vcpu->kvm->mmu_lock);
 		if(make_mmu_pages_available(vcpu) < 0) {
 			write_unlock(&vcpu->kvm->mmu_lock);
@@ -3863,6 +3901,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	gfn_t root_gfn, root_cr3;
 	int i;
 
+	write_lock(&vcpu->kvm->mmu_lock);
+	vcpu->kvm->arch.pure_direct_mmu = false;
+	write_unlock(&vcpu->kvm->mmu_lock);
+
 	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 	root_gfn = root_cr3 >> PAGE_SHIFT;
 
@@ -5710,6 +5752,64 @@ void kvm_disable_tdp(void)
 }
 EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
+static bool is_direct_mmu_enabled(void)
+{
+	if (!READ_ONCE(direct_mmu_enabled))
+		return false;
+
+	if (WARN_ONCE(!tdp_enabled,
+		      "Creating a VM with direct MMU enabled requires TDP."))
+		return false;
+
+	return true;
+}
+
+static int kvm_mmu_init_direct_mmu(struct kvm *kvm)
+{
+	struct page *page;
+	int i;
+
+	if (!is_direct_mmu_enabled())
+		return 0;
+
+	/*
+	 * Allocate the direct MMU root pages. These pages follow the life of
+	 * the VM.
+	 */
+	for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) {
+		page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+		if (!page)
+			goto err;
+		kvm->arch.direct_root_hpa[i] = page_to_phys(page);
+	}
+
+	/* This should not be changed for the lifetime of the VM. */
+	kvm->arch.direct_mmu_enabled = true;
+
+	kvm->arch.pure_direct_mmu = true;
+	return 0;
+err:
+	for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) {
+		if (kvm->arch.direct_root_hpa[i] &&
+		    VALID_PAGE(kvm->arch.direct_root_hpa[i]))
+			free_page((unsigned long)kvm->arch.direct_root_hpa[i]);
+		kvm->arch.direct_root_hpa[i] = INVALID_PAGE;
+	}
+	return -ENOMEM;
+}
+
+static void kvm_mmu_uninit_direct_mmu(struct kvm *kvm)
+{
+	int i;
+
+	if (!kvm->arch.direct_mmu_enabled)
+		return;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		handle_disconnected_pt(kvm, i, 0,
+			(kvm_pfn_t)(kvm->arch.direct_root_hpa[i] >> PAGE_SHIFT),
+			PT64_ROOT_4LEVEL);
+}
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
 typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
@@ -5956,13 +6056,19 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
 	kvm_mmu_zap_all_fast(kvm);
 }
 
-void kvm_mmu_init_vm(struct kvm *kvm)
+int kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+	int r;
+
+	r = kvm_mmu_init_direct_mmu(kvm);
+	if (r)
+		return r;
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
 	kvm_page_track_register_notifier(kvm, node);
+	return 0;
 }
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -5970,6 +6076,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
 	kvm_page_track_unregister_notifier(kvm, node);
+
+	kvm_mmu_uninit_direct_mmu(kvm);
 }
 
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ecf83da396c9..2972b6c6029fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9421,6 +9421,8 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+	int err;
+
 	if (type)
 		return -EINVAL;
 
@@ -9450,9 +9452,19 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
-	kvm_mmu_init_vm(kvm);
+	err = kvm_mmu_init_vm(kvm);
+	if (err)
+		return err;
+
+	err = kvm_x86_ops->vm_init(kvm);
+	if (err)
+		goto error;
+
+	return 0;
 
-	return kvm_x86_ops->vm_init(kvm);
+error:
+	kvm_mmu_uninit_vm(kvm);
+	return err;
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-- 
2.23.0.444.g18eeb5a265-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux