[PATCH v2 03/17] kvm: x86: Add fast CR3 switch code path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When using shadow paging, a CR3 switch in the guest results in a VM Exit.
In the common case, that VM exit doesn't require much processing by KVM.
However, it does acquire the MMU lock, which can start showing signs of
contention under some workloads even on a 2 VCPU VM when the guest is
using KPTI. Therefore, we add a fast path that avoids acquiring the MMU
lock in the most common cases e.g. when switching back and forth between
the kernel and user mode CR3s used by KPTI with no guest page table
changes in between.

For now, this fast path is implemented only for 64-bit guests and hosts
to avoid the handling of PDPTEs, but it can be extended later to 32-bit
guests and/or hosts as well.

Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h | 13 ++++++-
 arch/x86/kvm/mmu.c              | 66 ++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c              |  3 +-
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ebe659f2802..0869a684f852 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -325,6 +325,14 @@ struct rsvd_bits_validate {
 	u64 bad_mt_xwr;
 };
 
+struct kvm_mmu_root_info {
+	gpa_t cr3;
+	hpa_t hpa;
+};
+
+#define KVM_MMU_ROOT_INFO_INVALID \
+	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -353,6 +361,7 @@ struct kvm_mmu {
 	u8 shadow_root_level;
 	u8 ept_ad;
 	bool direct_map;
+	struct kvm_mmu_root_info prev_root;
 
 	/*
 	 * Bitmap; bit set = permission fault
@@ -1279,7 +1288,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1298,7 +1307,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3);
 
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ad2e2a00dc71..0c52b5d1010b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3396,17 +3396,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	*root_hpa = INVALID_PAGE;
 }
 
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root)
 {
 	int i;
 	LIST_HEAD(invalid_list);
 	struct kvm_mmu *mmu = &vcpu->arch.mmu;
 
-	if (!VALID_PAGE(mmu->root_hpa))
+	if (!VALID_PAGE(mmu->root_hpa) &&
+	    (!VALID_PAGE(mmu->prev_root.hpa) || !free_prev_root))
 		return;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
+	if (free_prev_root)
+		mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
+				   &invalid_list);
+
 	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
 	    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
 		mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
@@ -4006,13 +4011,59 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->root_hpa = INVALID_PAGE;
+	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	context->direct_map = true;
 	context->nx = false;
 }
 
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
+static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3)
 {
-	kvm_mmu_free_roots(vcpu);
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+	/*
+	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
+	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
+	 * later if necessary.
+	 */
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+	    mmu->root_level >= PT64_ROOT_4LEVEL) {
+		gpa_t prev_cr3 = mmu->prev_root.cr3;
+
+		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
+			return false;
+
+		swap(mmu->root_hpa, mmu->prev_root.hpa);
+		mmu->prev_root.cr3 = kvm_read_cr3(vcpu);
+
+		if (new_cr3 == prev_cr3 && VALID_PAGE(mmu->root_hpa)) {
+			/*
+			 * It is possible that the cached previous root page is
+			 * obsolete because of a change in the MMU
+			 * generation number. However, that is accompanied by
+			 * KVM_REQ_MMU_RELOAD, which will free the root that we
+			 * have set here and allocate a new one.
+			 */
+
+			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+			__clear_sp_write_flooding_count(
+				page_header(mmu->root_hpa));
+
+			vcpu->arch.cr3 = new_cr3;
+			__set_bit(VCPU_EXREG_CR3,
+				  (ulong *)&vcpu->arch.regs_avail);
+			mmu->set_cr3(vcpu, mmu->root_hpa);
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3)
+{
+	if (!fast_cr3_switch(vcpu, new_cr3))
+		kvm_mmu_free_roots(vcpu, false);
 }
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -4490,6 +4541,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->update_pte = paging64_update_pte;
 	context->shadow_root_level = level;
 	context->root_hpa = INVALID_PAGE;
+	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	context->direct_map = false;
 }
 
@@ -4520,6 +4572,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 	context->update_pte = paging32_update_pte;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->root_hpa = INVALID_PAGE;
+	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	context->direct_map = false;
 }
 
@@ -4543,6 +4596,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->update_pte = nonpaging_update_pte;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
 	context->root_hpa = INVALID_PAGE;
+	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	context->direct_map = true;
 	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
 	context->get_cr3 = get_cr3;
@@ -4625,6 +4679,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->update_pte = ept_update_pte;
 	context->root_level = PT64_ROOT_4LEVEL;
 	context->root_hpa = INVALID_PAGE;
+	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	context->direct_map = false;
 	context->base_role.ad_disabled = !accessed_dirty;
 	context->base_role.guest_mode = 1;
@@ -4727,7 +4782,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu, true);
 	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -5107,6 +5162,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	vcpu->arch.mmu.translate_gpa = translate_gpa;
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06dd4cdb2ca8..2d1041b5739b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
+	kvm_mmu_new_cr3(vcpu, cr3);
 	vcpu->arch.cr3 = cr3;
 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-	kvm_mmu_new_cr3(vcpu);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
-- 
2.18.0.rc1.242.g61856ae69a-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux