[PATCH v1 11/13] KVM: x86/mmu: Split huge pages during CLEAR_DIRTY_LOG

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When using initially-all-set, huge pages are not write-protected when
dirty logging is enabled on the memslot. Instead they are
write-protected once userspace invokes CLEAR_DIRTY_LOG for the first
time and only for the specific sub-region being cleared.

Enhance CLEAR_DIRTY_LOG to also try to split huge pages prior to
write-protecting to avoid causing write-protection faults on vCPU
threads. This also allows userspace to smear the cost of huge page
splitting across multiple ioctls rather than splitting the entire
memslot when not using initially-all-set.

Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  4 ++++
 arch/x86/kvm/mmu/mmu.c          | 36 +++++++++++++++++++++++++++------
 arch/x86/kvm/x86.c              |  2 +-
 arch/x86/kvm/x86.h              |  2 ++
 4 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4a507109e886..3e537e261562 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1576,6 +1576,10 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
 				       const struct kvm_memory_slot *memslot,
 				       int target_level);
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+				  const struct kvm_memory_slot *memslot,
+				  u64 start, u64 end,
+				  int target_level);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c9e5fe290714..55640d73df5a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1362,6 +1362,20 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask);
 		gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
 
+		/*
+		 * Try to proactively split any huge pages down to 4KB so that
+		 * vCPUs don't have to take write-protection faults.
+		 *
+		 * Drop the MMU lock since huge page splitting uses its own
+		 * locking scheme and does not require the write lock in all
+		 * cases.
+		 */
+		if (READ_ONCE(eagerly_split_huge_pages_for_dirty_logging)) {
+			write_unlock(&kvm->mmu_lock);
+			kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
+			write_lock(&kvm->mmu_lock);
+		}
+
 		kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
 
 		/* Cross two large pages? */
@@ -5811,13 +5825,11 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
-void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
-				       const struct kvm_memory_slot *memslot,
-				       int target_level)
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+				   const struct kvm_memory_slot *memslot,
+				   u64 start, u64 end,
+				   int target_level)
 {
-	u64 start = memslot->base_gfn;
-	u64 end = start + memslot->npages;
-
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
 		kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level);
@@ -5825,6 +5837,18 @@ void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
 	}
 }
 
+void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
+					const struct kvm_memory_slot *memslot,
+					int target_level)
+{
+	u64 start, end;
+
+	start = memslot->base_gfn;
+	end = start + memslot->npages;
+
+	kvm_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level);
+}
+
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 					 struct kvm_rmap_head *rmap_head,
 					 const struct kvm_memory_slot *slot)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb5592bf2eee..e27a3d6e3978 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -187,7 +187,7 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
 int __read_mostly pi_inject_timer = -1;
 module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
 
-static bool __read_mostly eagerly_split_huge_pages_for_dirty_logging = true;
+bool __read_mostly eagerly_split_huge_pages_for_dirty_logging = true;
 module_param(eagerly_split_huge_pages_for_dirty_logging, bool, 0644);
 
 /*
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 4abcd8d9836d..825e47451875 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -352,6 +352,8 @@ extern int pi_inject_timer;
 
 extern bool report_ignored_msrs;
 
+extern bool eagerly_split_huge_pages_for_dirty_logging;
+
 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 {
 	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
-- 
2.34.1.173.g76aa8bc2d0-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux