[RFC PATCH 15/28] kvm: mmu: Support invalidate_zap_all_pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adds a function for zapping ranges of GFNs in an address space which
uses the paging structure iterator and uses the function to support
invalidate_zap_all_pages for the direct MMU.

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
 arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 234db5f4246a4..f0696658b527c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2120,7 +2120,6 @@ static void direct_walk_iterator_reset_traversal(
  * range, so the last gfn to be interated over would be the largest possible
  * GFN, in this scenario.)
  */
-__attribute__((unused))
 static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
 	struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
 	enum mmu_lock_mode lock_mode)
@@ -2151,7 +2150,6 @@ static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
 	direct_walk_iterator_start_traversal(iter);
 }
 
-__attribute__((unused))
 static void direct_walk_iterator_retry_pte(struct direct_walk_iterator *iter)
 {
 	BUG_ON(!iter->walk_in_progress);
@@ -2397,7 +2395,6 @@ static bool cmpxchg_pte(u64 *ptep, u64 old_pte, u64 new_pte, int level, u64 gfn)
 	return r == old_pte;
 }
 
-__attribute__((unused))
 static bool direct_walk_iterator_set_pte(struct direct_walk_iterator *iter,
 					 u64 new_pte)
 {
@@ -2725,6 +2722,44 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 	return ret;
 }
 
+/*
+ * Marks the range of gfns, [start, end), non-present.
+ */
+static bool zap_direct_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
+				gfn_t end, enum mmu_lock_mode lock_mode)
+{
+	struct direct_walk_iterator iter;
+
+	direct_walk_iterator_setup_walk(&iter, kvm, as_id, start, end,
+					lock_mode);
+	while (direct_walk_iterator_next_present_pte(&iter)) {
+		/*
+		 * The gfn range should be handled at the largest granularity
+		 * possible, however since the functions which handle changed
+		 * PTEs (and freeing child PTs) will not yield, zapping an
+		 * entry with too many child PTEs can lead to scheduler
+		 * problems. In order to avoid scheduler problems, only zap
+		 * PTEs at PDPE level and lower. The root level entries will be
+		 * zapped and the high level page table pages freed on VM
+		 * teardown.
+		 */
+		if ((iter.pte_gfn_start < start ||
+		     iter.pte_gfn_end > end ||
+		     iter.level > PT_PDPE_LEVEL) &&
+		    !is_last_spte(iter.old_pte, iter.level))
+			continue;
+
+		/*
+		 * If the compare / exchange succeeds, then we will continue on
+		 * to the next pte. If it fails, the next iteration will repeat
+		 * the current pte. We'll handle both cases in the same way, so
+		 * we don't need to check the result here.
+		 */
+		direct_walk_iterator_set_pte(&iter, 0);
+	}
+	return direct_walk_iterator_end_traversal(&iter);
+}
+
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm,
@@ -6645,11 +6680,26 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
  */
 static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 {
+	int i;
+
 	lockdep_assert_held(&kvm->slots_lock);
 
 	write_lock(&kvm->mmu_lock);
 	trace_kvm_mmu_zap_all_fast(kvm);
 
+	/* Zap all direct MMU PTEs slowly */
+	if (kvm->arch.direct_mmu_enabled) {
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+			zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+					MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+	}
+
+	if (kvm->arch.pure_direct_mmu) {
+		kvm_flush_remote_tlbs(kvm);
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
+
 	/*
 	 * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
 	 * held for the entire duration of zapping obsolete pages, it's
@@ -6888,8 +6938,21 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	struct kvm_mmu_page *sp, *node;
 	LIST_HEAD(invalid_list);
 	int ign;
+	int i;
 
 	write_lock(&kvm->mmu_lock);
+	if (kvm->arch.direct_mmu_enabled) {
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+			zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+					MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+		kvm_flush_remote_tlbs(kvm);
+	}
+
+	if (kvm->arch.pure_direct_mmu) {
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
+
 restart:
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
 		if (sp->role.invalid && sp->root_count)
-- 
2.23.0.444.g18eeb5a265-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux