By this patch, bitmap allocation is replaced with do_mmap() and bitmap manipulation is replaced with *_user() functions. Note that this does not change the APIs between kernel and user space. To get more advantage from this hack, we need to add a new interface for triggering the bitmap swith and getting the bitmap addresses: the addresses is in user space and we can export them to qemu. TODO: 1. We want to use copy_in_user() for 32bit case too. Note that this is only for the compatibility issue: in the future, we hope, qemu will not need to use this ioctl. 2. We have to implement test_bit_user() to avoid extra set_bit. Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx> Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx> --- arch/x86/kvm/x86.c | 118 +++++++++++++++++++++++++++++++++++++-------- include/linux/kvm_host.h | 4 ++ virt/kvm/kvm_main.c | 30 +++++++++++- 3 files changed, 130 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 450ecfe..995b970 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2642,16 +2642,99 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, return 0; } +int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot) +{ + unsigned long user_addr1; + unsigned long user_addr2; + int dirty_bytes = kvm_dirty_bitmap_bytes(memslot); + + down_write(¤t->mm->mmap_sem); + user_addr1 = do_mmap(NULL, 0, dirty_bytes, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)user_addr1)) { + up_write(¤t->mm->mmap_sem); + return PTR_ERR((void *)user_addr1); + } + user_addr2 = do_mmap(NULL, 0, dirty_bytes, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)user_addr2)) { + do_munmap(current->mm, user_addr1, dirty_bytes); + up_write(¤t->mm->mmap_sem); + return PTR_ERR((void *)user_addr2); + } + up_write(¤t->mm->mmap_sem); + + memslot->dirty_bitmap = (unsigned long __user *)user_addr1; + memslot->dirty_bitmap_old = (unsigned long __user *)user_addr2; + clear_user(memslot->dirty_bitmap, dirty_bytes); + clear_user(memslot->dirty_bitmap_old, dirty_bytes); + + return 0; +} + +void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) +{ + int n = kvm_dirty_bitmap_bytes(memslot); + + if (!memslot->dirty_bitmap) + return; + + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap, n); + do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap_old, n); + up_write(¤t->mm->mmap_sem); + + memslot->dirty_bitmap = NULL; + memslot->dirty_bitmap_old = NULL; +} + +static int kvm_copy_dirty_bitmap(unsigned long __user *to, + const unsigned long __user *from, int n) +{ +#ifdef CONFIG_X86_64 + if (copy_in_user(to, from, n) < 0) { + printk(KERN_WARNING "%s: copy_in_user failed\n", __func__); + return -EFAULT; + } + return 0; +#else + int ret = 0; + void *p = vmalloc(n); + + if (!p) { + ret = -ENOMEM; + goto out; + } + if (copy_from_user(p, from, n) < 0) { + printk(KERN_WARNING "%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + goto out_free; + } + if (copy_to_user(to, p, n) < 0) { + printk(KERN_WARNING "%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + goto out_free; + } + +out_free: + vfree(p); +out: + return ret; +#endif +} + /* * Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, n; struct kvm_memory_slot *memslot; - unsigned long is_dirty = 0; - unsigned long *dirty_bitmap = NULL; + unsigned long __user *dirty_bitmap; + unsigned long __user *dirty_bitmap_old; mutex_lock(&kvm->slots_lock); @@ -2664,44 +2747,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = kvm_dirty_bitmap_bytes(memslot); - - r = -ENOMEM; - dirty_bitmap = vmalloc(n); - if (!dirty_bitmap) - goto out; - memset(dirty_bitmap, 0, n); + dirty_bitmap = memslot->dirty_bitmap; + dirty_bitmap_old = memslot->dirty_bitmap_old; - for (i = 0; !is_dirty && i < n/sizeof(long); i++) - is_dirty = memslot->dirty_bitmap[i]; + n = kvm_dirty_bitmap_bytes(memslot); + clear_user(dirty_bitmap_old, n); /* If nothing is dirty, don't bother messing with page tables. */ - if (is_dirty) { + if (memslot->is_dirty) { struct kvm_memslots *slots, *old_slots; spin_lock(&kvm->mmu_lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); spin_unlock(&kvm->mmu_lock); + r = -ENOMEM; slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) - goto out_free; + goto out; memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; + slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old; + slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap; + slots->memslots[log->slot].is_dirty = false; old_slots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; kfree(old_slots); } - r = 0; - if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) - r = -EFAULT; -out_free: - vfree(dirty_bitmap); + r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap, n); out: mutex_unlock(&kvm->slots_lock); return r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 07092d6..834812f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -276,6 +276,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +#ifdef __KVM_HAVE_USER_DIRTYBITMAP +int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot); +void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot); +#endif void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f919bd1..038a677 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -433,8 +433,12 @@ out_err_nodisable: static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + kvm_arch_destroy_dirty_bitmap(memslot); +#else vfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; +#endif } /* @@ -463,13 +467,26 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, free->rmap = NULL; } +/* + * We don't munmap dirty bitmaps by ourselves in the case of vm destruction. + */ +static void kvm_pre_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) +{ +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + memslot->dirty_bitmap = NULL; + memslot->dirty_bitmap_old = NULL; +#endif +} + void kvm_free_physmem(struct kvm *kvm) { int i; struct kvm_memslots *slots = kvm->memslots; - for (i = 0; i < slots->nmemslots; ++i) + for (i = 0; i < slots->nmemslots; ++i) { + kvm_pre_destroy_dirty_bitmap(&slots->memslots[i]); kvm_free_physmem_slot(&slots->memslots[i], NULL); + } kfree(kvm->memslots); } @@ -523,6 +540,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + return kvm_arch_create_dirty_bitmap(memslot); +#else int dirty_bytes = kvm_dirty_bitmap_bytes(memslot); memslot->dirty_bitmap = vmalloc(dirty_bytes); @@ -530,6 +550,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return -ENOMEM; memset(memslot->dirty_bitmap, 0, dirty_bytes); +#endif return 0; } @@ -1197,9 +1218,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; +#ifdef __KVM_HAVE_USER_DIRTYBITMAP + if (set_bit_user(rel_gfn, memslot->dirty_bitmap) < 0) + printk(KERN_WARNING "%s: set_bit_user failed\n", __func__); + + memslot->is_dirty = true; +#else /* avoid RMW */ if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); +#endif } } -- 1.6.3.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html