[RFC PATCH 4/4] KVM: selective write protection using dirty bitmap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Lai Jiangshan once tried to rewrite kvm_mmu_slot_remove_write_access() using
rmap: "kvm: rework remove-write-access for a slot"
      http://www.spinics.net/lists/kvm/msg35871.html

One problem pointed out there was that this approach might hurt cache locality
and make things slow down.

But if we restrict the story to dirty logging, we notice that only small
portion of pages are actually needed to be write protected.

So this patch uses his approach with small modification to use switched out
dirty bitmap as a hint to restrict the rmap travel.

We can also use this to selectively write protect pages to reduce unwanted page
faults in the future.

Conditions at which this hack has advantage:
 - few dirty pages compared to shadow pages
 - under reasonable, not pathological, workloads

  Note that slots for frame buffers are also affected by the total number of
  shadow pages in the original implmentation. Actually, I observed that the
  larger RAM became the more time kvm_mmu_slot_remove_write_access() took
  for frame buffer updates. This problem can be solved by our approach.

  Also, in the usual workloads, live-migration does not see so many dirty
  pages which makes this approach not effective.

Performance gain:
  During x11perf, in the condition of nr_dirty_pages/npages = 375/4096,
  our method was 20 times faster than the original.

  Live-migration also got similar improvement, and the effect for low
  workloads was more siginificant.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx>
Cc: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |    2 +
 arch/x86/kvm/mmu.c              |   42 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   37 ++++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b04c0fa..bc72c0d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -617,6 +617,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm,
+		struct kvm_memory_slot *slot, unsigned long *dirty_bitmap);
 void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2139309..978e806 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3456,6 +3456,48 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 	kvm_flush_remote_tlbs(kvm);
 }
 
+static void remove_write_access_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte = rmap_next(kvm, rmapp, NULL);
+
+	while (spte) {
+		update_spte(spte, *spte & ~PT_WRITABLE_MASK);
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+}
+
+/*
+ * Write protect the pages marked dirty in a given bitmap.
+ */
+void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm,
+					   struct kvm_memory_slot *slot,
+					   unsigned long *dirty_bitmap)
+{
+	int i;
+	gfn_t gfn_offset;
+	unsigned long idx;
+	long last_idx[KVM_NR_PAGE_SIZES - 1];
+
+	for (i = 0; i < (KVM_NR_PAGE_SIZES - 1); ++i)
+		last_idx[i] = -1;
+
+	for_each_set_bit(gfn_offset, dirty_bitmap, slot->npages) {
+		remove_write_access_rmapp(kvm, &slot->rmap[gfn_offset]);
+
+		for (i = 0; i < (KVM_NR_PAGE_SIZES - 1); ++i) {
+			idx = lpage_idx(slot->base_gfn + gfn_offset,
+					slot->base_gfn, PT_DIRECTORY_LEVEL + i);
+			if (idx == last_idx[i])
+				continue;
+
+			remove_write_access_rmapp(kvm,
+					&slot->lpage_info[i][idx].rmap_pde);
+			last_idx[i] = idx;
+		}
+	}
+	kvm_flush_remote_tlbs(kvm);
+}
+
 void kvm_mmu_zap_all(struct kvm *kvm)
 {
 	struct kvm_mmu_page *sp, *node;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e9cf381..222af5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3193,6 +3193,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
+enum kvm_dirty_level {
+	KVM_DIRTY_LEVEL_NOTHING,
+	KVM_DIRTY_LEVEL_LIGHT,
+	KVM_DIRTY_LEVEL_HEAVY
+};
+
+/*
+ * Decide which write protection functions we should use.
+ *
+ * Key factors:
+ *  - number of dirty pages
+ *  - number of shadow pages
+ *  - direct mode or shadow mode
+ */
+static enum kvm_dirty_level dirty_level_memslot(struct kvm_memory_slot *memslot)
+{
+	if (!memslot->nr_dirty_pages)
+		return KVM_DIRTY_LEVEL_NOTHING;
+
+	if ((memslot->nr_dirty_pages < 2048) ||
+	    (memslot->nr_dirty_pages < memslot->npages / 64))
+		return KVM_DIRTY_LEVEL_LIGHT;
+
+	return KVM_DIRTY_LEVEL_HEAVY;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -3202,6 +3228,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int r;
 	struct kvm_memory_slot *memslot;
 	unsigned long n;
+	enum kvm_dirty_level dirty_level;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -3217,7 +3244,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	n = kvm_dirty_bitmap_bytes(memslot);
 
 	/* If nothing is dirty, don't bother messing with page tables. */
-	if (memslot->nr_dirty_pages) {
+	dirty_level = dirty_level_memslot(memslot);
+	if (dirty_level != KVM_DIRTY_LEVEL_NOTHING) {
 		struct kvm_memslots *slots, *old_slots;
 		unsigned long *dirty_bitmap;
 
@@ -3242,7 +3270,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		kfree(old_slots);
 
 		spin_lock(&kvm->mmu_lock);
-		kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		if (dirty_level == KVM_DIRTY_LEVEL_HEAVY)
+			kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		else
+			kvm_mmu_slot_remove_write_access_mask(kvm,
+						&slots->memslots[log->slot],
+						dirty_bitmap);
 		spin_unlock(&kvm->mmu_lock);
 
 		r = -EFAULT;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux