[PATCH 2/2] KVM: x86/mmu: Ensure TLBs are flushed when yielding during NX zapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Fix two intertwined bugs in the NX huge page zapping that were introduced
by the incorporation of the TDP MMU.  Because there is a unified list of
NX huge pages, zapping can encounter both TDP MMU and legacy MMU pages,
and the two MMUs have different tracking for TLB flushing.  If one flavor
needs a flush, but the code for the other flavor yields, KVM will fail to
flush before yielding.

First, honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(),
which does the flush itself if and only if it yields, and otherwise
expects the caller to do the flush.  This requires feeding the result
into kvm_mmu_remote_flush_or_zap(), and so also fixes the case where the
TDP MMU needs a flush, the legacy MMU does not, and the main loop yields.

Second, tell the TDP MMU a flush is pending if the list of zapped pages
from legacy MMUs is not empty, i.e. the legacy MMU needs a flush.  This
fixes the case where the TDP MMU yields, but it iteslf does not require a
flush.

Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU")
Cc: stable@xxxxxxxxxxxxxxx
Cc: Ben Gardon <bgardon@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
 arch/x86/kvm/mmu/mmu.c     | 15 ++++++++++-----
 arch/x86/kvm/mmu/tdp_mmu.c |  6 +++---
 arch/x86/kvm/mmu/tdp_mmu.h |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c6ed633594a2..413d6259340e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5517,7 +5517,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
-		flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
+		flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end,
+						  false);
 		if (flush)
 			kvm_flush_remote_tlbs(kvm);
 	}
@@ -5939,6 +5940,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 	struct kvm_mmu_page *sp;
 	unsigned int ratio;
 	LIST_HEAD(invalid_list);
+	bool flush = false;
+	gfn_t gfn_end;
 	ulong to_zap;
 
 	rcu_idx = srcu_read_lock(&kvm->srcu);
@@ -5960,19 +5963,21 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 				      lpage_disallowed_link);
 		WARN_ON_ONCE(!sp->lpage_disallowed);
 		if (is_tdp_mmu_page(sp)) {
-			kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
-				sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+			gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
+			flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end,
+							  flush || !list_empty(&invalid_list));
 		} else {
 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
 			WARN_ON_ONCE(sp->lpage_disallowed);
 		}
 
 		if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
-			kvm_mmu_commit_zap_page(kvm, &invalid_list);
+			kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
 			cond_resched_rwlock_write(&kvm->mmu_lock);
+			flush = false;
 		}
 	}
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
 
 	write_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, rcu_idx);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6cf08c3c537f..367f12bf1026 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -709,10 +709,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
  * SPTEs have been cleared and a TLB flush is needed before releasing the
  * MMU lock.
  */
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+			       bool flush)
 {
 	struct kvm_mmu_page *root;
-	bool flush = false;
 
 	for_each_tdp_mmu_root_yield_safe(kvm, root)
 		flush = zap_gfn_range(kvm, root, start, end, true, flush);
@@ -725,7 +725,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 	gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
 	bool flush;
 
-	flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
+	flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn, false);
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 3b761c111bff..e39bee52d49e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -8,7 +8,8 @@
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
 
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+			       bool flush);
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
 
 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
-- 
2.31.0.rc2.261.g7f71774620-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux