[PATCH RFC v2 5/6] KVM: moving dirty bitmaps to user space

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We move dirty bitmaps to user space.

 - Allocation and destruction: we use do_mmap() and do_munmap().
   The new bitmap space is twice longer than the original one and we
   use the additional space for double buffering: this makes it
   possible to update the active bitmap while letting the user space
   read the other one safely.

 - Bitmap manipulations: we replace all functions which access dirty
   bitmaps to *_user() functions. Note that some of these should be
   optimized later.

 - For ia64: moving the dirty bitmaps of memory slots does not effect
   much to ia64 because it's using a different space to store bitmaps
   which is directly updated: all we have to change are sync and get
   of dirty log, so we don't need set_bit_user like function for ia64.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx>
Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx>
---
 arch/ia64/kvm/kvm-ia64.c  |   12 ++++-
 arch/powerpc/kvm/book3s.c |    2 +-
 arch/x86/kvm/x86.c        |   24 +++++-----
 include/linux/kvm_host.h  |    5 +-
 virt/kvm/kvm_main.c       |  101 ++++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 116 insertions(+), 28 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d60dafe..c3f0b70 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1823,11 +1823,19 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	n = kvm_dirty_bitmap_bytes(memslot);
 	base = memslot->base_gfn / BITS_PER_LONG;
 
+	r = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, memslot->dirty_bitmap, n))
+		goto out;
+
 	for (i = 0; i < n/sizeof(long); ++i) {
 		if (dirty_bitmap[base + i])
 			memslot->is_dirty = true;
 
-		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+		if (__put_user(dirty_bitmap[base + i],
+			       &memslot->dirty_bitmap[i])) {
+			r = -EFAULT;
+			goto out;
+		}
 		dirty_bitmap[base + i] = 0;
 	}
 	r = 0;
@@ -1858,7 +1866,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (memslot->is_dirty) {
 		kvm_flush_remote_tlbs(kvm);
 		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
+		clear_user(memslot->dirty_bitmap, n);
 		memslot->is_dirty = false;
 	}
 	r = 0;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 53b45cf..b074e19 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1137,7 +1137,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
 
 		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
+		clear_user(memslot->dirty_bitmap, n);
 		memslot->is_dirty = false;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f0b706..ad55353 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2727,7 +2727,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int r;
 	struct kvm_memory_slot *memslot;
 	unsigned long n;
-	unsigned long *dirty_bitmap = NULL;
+	unsigned long __user *dirty_bitmap;
+	unsigned long __user *dirty_bitmap_old;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -2742,11 +2743,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	n = kvm_dirty_bitmap_bytes(memslot);
 
-	r = -ENOMEM;
-	dirty_bitmap = vmalloc(n);
-	if (!dirty_bitmap)
-		goto out;
-	memset(dirty_bitmap, 0, n);
+	dirty_bitmap = memslot->dirty_bitmap;
+	dirty_bitmap_old = memslot->dirty_bitmap_old;
+	clear_user(dirty_bitmap_old, n);
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (memslot->is_dirty) {
@@ -2756,24 +2755,25 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
 
+		r = -ENOMEM;
 		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 		if (!slots)
-			goto out_free;
+			goto out;
 
 		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
-		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old;
+		slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap;
 		slots->memslots[log->slot].is_dirty = false;
 
 		old_slots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
 		synchronize_srcu_expedited(&kvm->srcu);
-		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
 		kfree(old_slots);
+
+		dirty_bitmap_old = dirty_bitmap;
 	}
 
-	r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap, n);
-out_free:
-	vfree(dirty_bitmap);
+	r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap_old, n);
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 42b7161..d4d217a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -116,7 +116,8 @@ struct kvm_memory_slot {
 	unsigned long npages;
 	unsigned long flags;
 	unsigned long *rmap;
-	unsigned long *dirty_bitmap;
+	unsigned long __user *dirty_bitmap;
+	unsigned long __user *dirty_bitmap_old;
 	bool is_dirty;
 	struct {
 		unsigned long rmap_pde;
@@ -331,7 +332,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 int kvm_dev_ioctl_check_extension(long ext);
 
 int kvm_copy_dirty_bitmap(unsigned long __user *to,
-			  const unsigned long *from,
+			  const unsigned long __user *from,
 			  unsigned long bytes);
 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 66c4daf..9323639 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -433,8 +433,20 @@ out_err_nodisable:
 
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
-	vfree(memslot->dirty_bitmap);
+	unsigned long user_addr;
+	unsigned long n = kvm_dirty_bitmap_bytes(memslot);
+
+	if (!memslot->dirty_bitmap)
+		return;
+
+	user_addr = min((unsigned long)memslot->dirty_bitmap,
+			(unsigned long)memslot->dirty_bitmap_old);
+	down_write(&current->mm->mmap_sem);
+	do_munmap(current->mm, user_addr, 2 * n);
+	up_write(&current->mm->mmap_sem);
+
 	memslot->dirty_bitmap = NULL;
+	memslot->dirty_bitmap_old = NULL;
 }
 
 /*
@@ -468,8 +480,12 @@ void kvm_free_physmem(struct kvm *kvm)
 	int i;
 	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < slots->nmemslots; ++i)
+	for (i = 0; i < slots->nmemslots; ++i) {
+		/* We don't munmap dirty bitmaps by ourselves. */
+		slots->memslots[i].dirty_bitmap = NULL;
+		slots->memslots[i].dirty_bitmap_old = NULL;
 		kvm_free_physmem_slot(&slots->memslots[i], NULL);
+	}
 
 	kfree(kvm->memslots);
 }
@@ -523,13 +539,22 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
-	unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
+	unsigned long user_addr;
+	unsigned long n = kvm_dirty_bitmap_bytes(memslot);
 
-	memslot->dirty_bitmap = vmalloc(dirty_bytes);
-	if (!memslot->dirty_bitmap)
-		return -ENOMEM;
+	down_write(&current->mm->mmap_sem);
+	user_addr = do_mmap(NULL, 0, 2 * n,
+			    PROT_READ | PROT_WRITE,
+			    MAP_PRIVATE | MAP_ANONYMOUS, 0);
+	up_write(&current->mm->mmap_sem);
+
+	if (IS_ERR((void *)user_addr))
+		return PTR_ERR((void *)user_addr);
+
+	memslot->dirty_bitmap = (unsigned long __user *)user_addr;
+	memslot->dirty_bitmap_old = (unsigned long __user *)(user_addr + n);
+	clear_user(memslot->dirty_bitmap, 2 * n);
 
-	memset(memslot->dirty_bitmap, 0, dirty_bytes);
 	return 0;
 }
 
@@ -778,13 +803,45 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 }
 
 int kvm_copy_dirty_bitmap(unsigned long __user *to,
-			  const unsigned long *from,
+			  const unsigned long __user *from,
 			  unsigned long bytes)
 {
-	if (copy_to_user(to, from, bytes))
+#if defined(CONFIG_X86_64) || defined(CONFIG_PPC64) || defined(CONFIG_IA64)
+	if (copy_in_user(to, from, bytes)) {
+		printk(KERN_WARNING "%s: copy_in_user failed.\n", __func__);
 		return -EFAULT;
+	}
+	return 0;
+#else
+	int num, bufbytes;
+	unsigned long buf[32];
 
+	if (!access_ok(VERIFY_READ, from, bytes) ||
+	    !access_ok(VERIFY_WRITE, to, bytes)) {
+		goto out_fault;
+	}
+
+	bufbytes = sizeof(buf);
+	num = bufbytes / sizeof(buf[0]);
+
+	for (; bytes > bufbytes; bytes -= bufbytes, to += num, from += num) {
+		if (__copy_from_user(buf, from, bufbytes))
+			goto out_fault;
+		if (__copy_to_user(to, buf, bufbytes))
+			goto out_fault;
+	}
+	if (bytes > 0) {
+		if (__copy_from_user(buf, from, bytes))
+			goto out_fault;
+		if (__copy_to_user(to, buf, bytes))
+			goto out_fault;
+	}
 	return 0;
+
+out_fault:
+	printk(KERN_WARNING "%s: copy to(from) user failed.\n", __func__);
+	return -EFAULT;
+#endif
 }
 
 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
@@ -1194,13 +1251,35 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
+/*
+ * Please use generic *_user bitops once they become available.
+ * Be careful setting the bit won't be done atomically.
+ */
 static int __mark_page_dirty(unsigned long nr,
-			     unsigned long *dirty_bitmap)
+			     unsigned long __user *dirty_bitmap)
 {
+	unsigned long user_addr;
+	u8 val;
+
 #ifdef __BIG_ENDIAN
 	nr = nr ^ BITOP_LE_SWIZZLE;
 #endif
-	__set_bit(nr, dirty_bitmap);
+	user_addr = (unsigned long)dirty_bitmap + nr / 8;
+	if (!access_ok(VERIFY_WRITE, user_addr, 1))
+		goto out_fault;
+
+	if (__get_user(val, (u8 __user *)user_addr))
+		goto out_fault;
+
+	val |= 1U << (nr % 8);
+	if (__put_user(val, (u8 __user *)user_addr))
+		goto out_fault;
+
+	return 0;
+
+out_fault:
+	printk(KERN_WARNING "%s: setting user bit failed.\n", __func__);
+	return -EFAULT;
 }
 
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
-- 
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux