[PATCH v3 17/18] kvm: x86: Add multi-entry LRU cache for previous CR3s

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adds support for storing multiple previous CR3/root_hpa pairs maintained
as an LRU cache, so that the lockless CR3 switch path can be used when
switching back to any of them.

Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  10 +--
 arch/x86/kvm/mmu.c              | 111 ++++++++++++++++++++++----------
 arch/x86/kvm/vmx.c              |  12 ++--
 3 files changed, 92 insertions(+), 41 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f50de601afd8..f854183e3ecd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -334,6 +334,8 @@ struct kvm_mmu_root_info {
 #define KVM_MMU_ROOT_INFO_INVALID \
 	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
 
+#define KVM_MMU_NUM_PREV_ROOTS 3
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -362,7 +364,7 @@ struct kvm_mmu {
 	u8 shadow_root_level;
 	u8 ept_ad;
 	bool direct_map;
-	struct kvm_mmu_root_info prev_root;
+	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
 
 	/*
 	 * Bitmap; bit set = permission fault
@@ -1286,9 +1288,9 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
 	return !!(*irq_state);
 }
 
-#define KVM_MMU_ROOT_CURRENT	BIT(0)
-#define KVM_MMU_ROOT_PREVIOUS	BIT(1)
-#define KVM_MMU_ROOTS_ALL	(~0UL)
+#define KVM_MMU_ROOT_CURRENT		BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
+#define KVM_MMU_ROOTS_ALL		(~0UL)
 
 int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
 void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9ca4984a8b87..00cea622f7b0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3445,18 +3445,26 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
 	LIST_HEAD(invalid_list);
 	struct kvm_mmu *mmu = &vcpu->arch.mmu;
 	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
-	bool free_prev_root = roots_to_free & KVM_MMU_ROOT_PREVIOUS;
+
+	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
 
 	/* Before acquiring the MMU lock, see if we need to do any real work. */
-	if (!(free_active_root && VALID_PAGE(mmu->root_hpa)) &&
-	    !(free_prev_root && VALID_PAGE(mmu->prev_root.hpa)))
-		return;
+	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
+			    VALID_PAGE(mmu->prev_roots[i].hpa))
+				break;
+
+		if (i == KVM_MMU_NUM_PREV_ROOTS)
+			return;
+	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
-	if (free_prev_root)
-		mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
-				   &invalid_list);
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
+			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
+					   &invalid_list);
 
 	if (free_active_root) {
 		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
@@ -4064,6 +4072,38 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->nx = false;
 }
 
+/*
+ * Find out if a previously cached root matching the new CR3/role is available.
+ * The current root is also inserted into the cache.
+ * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
+ * returned.
+ * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
+ * false is returned. This root should now be freed by the caller.
+ */
+static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+				  union kvm_mmu_page_role new_role)
+{
+	uint i;
+	struct kvm_mmu_root_info root;
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+	root.cr3 = mmu->get_cr3(vcpu);
+	root.hpa = mmu->root_hpa;
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		swap(root, mmu->prev_roots[i]);
+
+		if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
+		    page_header(root.hpa) != NULL &&
+		    new_role.word == page_header(root.hpa)->role.word)
+			break;
+	}
+
+	mmu->root_hpa = root.hpa;
+
+	return i < KVM_MMU_NUM_PREV_ROOTS;
+}
+
 static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			    union kvm_mmu_page_role new_role,
 			    bool skip_tlb_flush)
@@ -4077,18 +4117,10 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 	 */
 	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
 	    mmu->root_level >= PT64_ROOT_4LEVEL) {
-		gpa_t prev_cr3 = mmu->prev_root.cr3;
-
 		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
 			return false;
 
-		swap(mmu->root_hpa, mmu->prev_root.hpa);
-		mmu->prev_root.cr3 = mmu->get_cr3(vcpu);
-
-		if (new_cr3 == prev_cr3 &&
-		    VALID_PAGE(mmu->root_hpa) &&
-		    page_header(mmu->root_hpa) != NULL &&
-		    new_role.word == page_header(mmu->root_hpa)->role.word) {
+		if (cached_root_available(vcpu, new_cr3, new_role)) {
 			/*
 			 * It is possible that the cached previous root page is
 			 * obsolete because of a change in the MMU
@@ -4854,8 +4886,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
 {
 	if (reset_roots) {
+		uint i;
+
 		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-		vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
+
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 	}
 
 	if (mmu_is_nested(vcpu))
@@ -5225,22 +5261,24 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	uint i;
 
 	mmu->invlpg(vcpu, gva, mmu->root_hpa);
 
 	/*
 	 * INVLPG is required to invalidate any global mappings for the VA,
 	 * irrespective of PCID. Since it would take us roughly similar amount
-	 * of work to determine whether the prev_root mapping of the VA is
-	 * marked global, or to just sync it blindly, so we might as well just
-	 * always sync it.
+	 * of work to determine whether any of the prev_root mappings of the VA
+	 * is marked global, or to just sync it blindly, so we might as well
+	 * just always sync it.
 	 *
-	 * Mappings not reachable via the current cr3 or the prev_root.cr3 will
-	 * be synced when switching to that cr3, so nothing needs to be done
-	 * here for them.
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
 	 */
-	if (VALID_PAGE(mmu->prev_root.hpa))
-		mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (VALID_PAGE(mmu->prev_roots[i].hpa))
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
 
 	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
 	++vcpu->stat.invlpg;
@@ -5251,16 +5289,19 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 {
 	struct kvm_mmu *mmu = &vcpu->arch.mmu;
 	bool tlb_flush = false;
+	uint i;
 
 	if (pcid == kvm_get_active_pcid(vcpu)) {
 		mmu->invlpg(vcpu, gva, mmu->root_hpa);
 		tlb_flush = true;
 	}
 
-	if (VALID_PAGE(mmu->prev_root.hpa) &&
-	    pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) {
-		mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
-		tlb_flush = true;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
+		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+			tlb_flush = true;
+		}
 	}
 
 	if (tlb_flush)
@@ -5269,9 +5310,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 	++vcpu->stat.invlpg;
 
 	/*
-	 * Mappings not reachable via the current cr3 or the prev_root.cr3 will
-	 * be synced when switching to that cr3, so nothing needs to be done
-	 * here for them.
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
 	 */
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
@@ -5317,12 +5358,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
+	uint i;
+
 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
 	vcpu->arch.mmu.translate_gpa = translate_gpa;
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
 	return alloc_mmu_pages(vcpu);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4d0b13a84c30..f802102f6dc5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8653,6 +8653,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 	bool pcid_enabled;
 	gva_t gva;
 	struct x86_exception e;
+	uint i;
+	ulong roots_to_free = 0;
 	struct {
 		u64 pcid;
 		u64 gla;
@@ -8711,12 +8713,14 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		}
 
-		if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3)
-		    == operand.pcid)
-			kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_PREVIOUS);
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+			    == operand.pcid)
+				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
 
+		kvm_mmu_free_roots(vcpu, roots_to_free);
 		/*
-		 * If neither the current cr3 nor the prev_root.cr3 use the
+		 * If neither the current cr3 nor any of the prev_roots use the
 		 * given PCID, then nothing needs to be done here because a
 		 * resync will happen anyway before switching to any other CR3.
 		 */
-- 
2.18.0.rc2.346.g013aa6912e-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux