[PATCH RFC 5/5] KVM: This is the main part of the "moving dirty bitmaps to user space"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



By this patch, bitmap allocation is replaced with do_mmap() and
bitmap manipulation is replaced with *_user() functions.

Note that this does not change the APIs between kernel and user space.
To get more advantage from this hack, we need to add a new interface
for triggering the bitmap swith and getting the bitmap addresses: the
addresses is in user space and we can export them to qemu.

TODO:
1. We want to use copy_in_user() for 32bit case too.
   Note that this is only for the compatibility issue: in the future,
   we hope, qemu will not need to use this ioctl.
2. We have to implement test_bit_user() to avoid extra set_bit.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx>
Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx>
---
 arch/x86/kvm/x86.c       |  118 +++++++++++++++++++++++++++++++++++++--------
 include/linux/kvm_host.h |    4 ++
 virt/kvm/kvm_main.c      |   30 +++++++++++-
 3 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 450ecfe..995b970 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2642,16 +2642,99 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
+int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+	unsigned long user_addr1;
+	unsigned long user_addr2;
+	int dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
+
+	down_write(&current->mm->mmap_sem);
+	user_addr1 = do_mmap(NULL, 0, dirty_bytes,
+			     PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANONYMOUS, 0);
+	if (IS_ERR((void *)user_addr1)) {
+		up_write(&current->mm->mmap_sem);
+		return PTR_ERR((void *)user_addr1);
+	}
+	user_addr2 = do_mmap(NULL, 0, dirty_bytes,
+			     PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANONYMOUS, 0);
+	if (IS_ERR((void *)user_addr2)) {
+		do_munmap(current->mm, user_addr1, dirty_bytes);
+		up_write(&current->mm->mmap_sem);
+		return PTR_ERR((void *)user_addr2);
+	}
+	up_write(&current->mm->mmap_sem);
+
+	memslot->dirty_bitmap = (unsigned long __user *)user_addr1;
+	memslot->dirty_bitmap_old = (unsigned long __user *)user_addr2;
+	clear_user(memslot->dirty_bitmap, dirty_bytes);
+	clear_user(memslot->dirty_bitmap_old, dirty_bytes);
+
+	return 0;
+}
+
+void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+	int n = kvm_dirty_bitmap_bytes(memslot);
+
+	if (!memslot->dirty_bitmap)
+		return;
+
+	down_write(&current->mm->mmap_sem);
+	do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap, n);
+	do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap_old, n);
+	up_write(&current->mm->mmap_sem);
+
+	memslot->dirty_bitmap = NULL;
+	memslot->dirty_bitmap_old = NULL;
+}
+
+static int kvm_copy_dirty_bitmap(unsigned long __user *to,
+				 const unsigned long __user *from, int n)
+{
+#ifdef CONFIG_X86_64
+	if (copy_in_user(to, from, n) < 0) {
+		printk(KERN_WARNING "%s: copy_in_user failed\n", __func__);
+		return -EFAULT;
+	}
+	return 0;
+#else
+	int ret = 0;
+	void *p = vmalloc(n);
+
+	if (!p) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(p, from, n) < 0) {
+		printk(KERN_WARNING "%s: copy_from_user failed\n", __func__);
+		ret = -EFAULT;
+		goto out_free;
+	}
+	if (copy_to_user(to, p, n) < 0) {
+		printk(KERN_WARNING "%s: copy_to_user failed\n", __func__);
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+out_free:
+	vfree(p);
+out:
+	return ret;
+#endif
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r, n, i;
+	int r, n;
 	struct kvm_memory_slot *memslot;
-	unsigned long is_dirty = 0;
-	unsigned long *dirty_bitmap = NULL;
+	unsigned long __user *dirty_bitmap;
+	unsigned long __user *dirty_bitmap_old;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -2664,44 +2747,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (!memslot->dirty_bitmap)
 		goto out;
 
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	r = -ENOMEM;
-	dirty_bitmap = vmalloc(n);
-	if (!dirty_bitmap)
-		goto out;
-	memset(dirty_bitmap, 0, n);
+	dirty_bitmap = memslot->dirty_bitmap;
+	dirty_bitmap_old = memslot->dirty_bitmap_old;
 
-	for (i = 0; !is_dirty && i < n/sizeof(long); i++)
-		is_dirty = memslot->dirty_bitmap[i];
+	n = kvm_dirty_bitmap_bytes(memslot);
+	clear_user(dirty_bitmap_old, n);
 
 	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
+	if (memslot->is_dirty) {
 		struct kvm_memslots *slots, *old_slots;
 
 		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
 
+		r = -ENOMEM;
 		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 		if (!slots)
-			goto out_free;
+			goto out;
 
 		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
-		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old;
+		slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap;
+		slots->memslots[log->slot].is_dirty = false;
 
 		old_slots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
 		synchronize_srcu_expedited(&kvm->srcu);
-		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
 		kfree(old_slots);
 	}
 
-	r = 0;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
-		r = -EFAULT;
-out_free:
-	vfree(dirty_bitmap);
+	r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap, n);
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 07092d6..834812f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -276,6 +276,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot);
+void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot);
+#endif
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f919bd1..038a677 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -433,8 +433,12 @@ out_err_nodisable:
 
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+	kvm_arch_destroy_dirty_bitmap(memslot);
+#else
 	vfree(memslot->dirty_bitmap);
 	memslot->dirty_bitmap = NULL;
+#endif
 }
 
 /*
@@ -463,13 +467,26 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 	free->rmap = NULL;
 }
 
+/*
+ * We don't munmap dirty bitmaps by ourselves in the case of vm destruction.
+ */
+static void kvm_pre_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+	memslot->dirty_bitmap = NULL;
+	memslot->dirty_bitmap_old = NULL;
+#endif
+}
+
 void kvm_free_physmem(struct kvm *kvm)
 {
 	int i;
 	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < slots->nmemslots; ++i)
+	for (i = 0; i < slots->nmemslots; ++i) {
+		kvm_pre_destroy_dirty_bitmap(&slots->memslots[i]);
 		kvm_free_physmem_slot(&slots->memslots[i], NULL);
+	}
 
 	kfree(kvm->memslots);
 }
@@ -523,6 +540,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+	return kvm_arch_create_dirty_bitmap(memslot);
+#else
 	int dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
 
 	memslot->dirty_bitmap = vmalloc(dirty_bytes);
@@ -530,6 +550,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 		return -ENOMEM;
 
 	memset(memslot->dirty_bitmap, 0, dirty_bytes);
+#endif
 	return 0;
 }
 
@@ -1197,9 +1218,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+		if (set_bit_user(rel_gfn, memslot->dirty_bitmap) < 0)
+			printk(KERN_WARNING "%s: set_bit_user failed\n", __func__);
+
+		memslot->is_dirty = true;
+#else
 		/* avoid RMW */
 		if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
 			generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+#endif
 	}
 }
 
-- 
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux