On Mon, 2011-01-24 at 08:44 -0700, Alex Williamson wrote: > I'll look at how we might be > able to allocate slots on demand. Thanks, Here's a first cut just to see if this looks agreeable. This allows the slot array to grow on demand. This works with current userspace, as well as userspace trivially modified to double KVMState.slots and hotplugging enough pci-assign devices to exceed the previous limit (w/ & w/o ept). Hopefully I got the rcu bits correct. Does this look like the right path? If so, I'll work on removing the fixed limit from userspace next. Thanks, Alex kvm: Allow memory slot array to grow on demand Remove fixed KVM_MEMORY_SLOTS limit, allowing the slot array to grow on demand. Private slots are now allocated at the front instead of the end. Only x86 seems to use private slots, so this is now zero for all other archs. The memslots pointer is already updated using rcu, so changing the size off the array when it's replaces is straight forward. x86 also keeps a bitmap of slots used by a kvm_mmu_page, which requires a shadow tlb flush whenever we increase the number of slots. This forces the pages to be rebuilt with the new bitmap size. Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> --- arch/ia64/include/asm/kvm_host.h | 4 -- arch/ia64/kvm/kvm-ia64.c | 2 + arch/powerpc/include/asm/kvm_host.h | 3 -- arch/s390/include/asm/kvm_host.h | 3 -- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/include/asm/vmx.h | 6 ++- arch/x86/kvm/mmu.c | 7 +++- arch/x86/kvm/x86.c | 6 ++- include/linux/kvm_host.h | 7 +++- virt/kvm/kvm_main.c | 65 ++++++++++++++++++++++++----------- 10 files changed, 63 insertions(+), 43 deletions(-) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 2689ee5..11d0ab2 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -23,10 +23,6 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* define exit reasons from vmm to kvm*/ diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 70d224d..f1adda2 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1814,7 +1814,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b..dc80057 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -29,9 +29,6 @@ #include <asm/kvm_asm.h> #define KVM_MAX_VCPUS 1 -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index cef7dbf..92a964c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,9 +20,6 @@ #include <asm/cpu.h> #define KVM_MAX_VCPUS 64 -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 struct sca_entry { atomic_t scn; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffd7f8d..df1382c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,7 +27,6 @@ #include <asm/msr-index.h> #define KVM_MAX_VCPUS 64 -#define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -207,7 +206,7 @@ struct kvm_mmu_page { * One bit set per slot which has memory * in this shadow page. */ - DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + unsigned long *slot_bitmap; bool multimapped; /* More than one parent_pte? */ bool unsync; int root_count; /* Currently serving as active root */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 84471b8..7fd8c89 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -370,9 +370,9 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) +#define TSS_PRIVATE_MEMSLOT 0 +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 1 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 2 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ccacf0b..8c2533a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1032,6 +1032,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) ASSERT(is_empty_shadow_page(sp->spt)); hlist_del(&sp->hash_link); list_del(&sp->link); + kfree(sp->slot_bitmap); __free_page(virt_to_page(sp->spt)); if (!sp->role.direct) __free_page(virt_to_page(sp->gfns)); @@ -1048,6 +1049,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; + struct kvm_memslots *slots = kvm_memslots(vcpu->kvm); sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); @@ -1056,7 +1058,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); + sp->slot_bitmap = kzalloc(sizeof(long) * + BITS_TO_LONGS(slots->nmemslots), GFP_KERNEL); + if (!sp->slot_bitmap) + return NULL; sp->multimapped = 0; sp->parent_pte = parent_pte; kvm_mod_used_mmu_pages(vcpu->kvm, +1); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5eccdba..c002fac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1978,7 +1978,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_NR_MEMSLOTS: - r = KVM_MEMORY_SLOTS; + r = INT_MAX - KVM_PRIVATE_MEM_SLOTS; break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; @@ -3201,7 +3201,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; @@ -6068,7 +6068,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_MEMORY_SLOTS) + if (memslot->id < KVM_PRIVATE_MEM_SLOTS) map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5021db..4cb9f94 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -27,6 +27,10 @@ #include <asm/kvm_host.h> +#ifndef KVM_PRIVATE_MEM_SLOTS + #define KVM_PRIVATE_MEM_SLOTS 0 +#endif + /* * vcpu->requests bit members */ @@ -206,8 +210,7 @@ struct kvm_irq_routing_table {}; struct kvm_memslots { int nmemslots; u64 generation; - struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + - KVM_PRIVATE_MEM_SLOTS]; + struct kvm_memory_slot memslots[]; }; struct kvm { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fd67bcd..32f023c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -623,13 +623,14 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - int r; + int r, nmemslots; gfn_t base_gfn; unsigned long npages; unsigned long i; - struct kvm_memory_slot *memslot; - struct kvm_memory_slot old, new; + struct kvm_memory_slot *memslot = NULL; + struct kvm_memory_slot old = {}, new = {}; struct kvm_memslots *slots, *old_memslots; + bool flush = false; r = -EINVAL; /* General sanity checks */ @@ -639,12 +640,9 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) - goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - memslot = &kvm->memslots->memslots[mem->slot]; base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -655,7 +653,10 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; - new = old = *memslot; + if (mem->slot < kvm->memslots->nmemslots) { + memslot = &kvm->memslots->memslots[mem->slot]; + new = old = *memslot; + } new.id = mem->slot; new.base_gfn = base_gfn; @@ -669,7 +670,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Check for overlaps */ r = -EEXIST; - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + for (i = KVM_PRIVATE_MEM_SLOTS; i < kvm->memslots->nmemslots; ++i) { struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; if (s == memslot || !s->npages) @@ -752,12 +753,19 @@ skip_lpage: if (!npages) { r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + + nmemslots = (mem->slot >= kvm->memslots->nmemslots) ? + mem->slot + 1 : kvm->memslots->nmemslots; + + slots = kzalloc(sizeof(struct kvm_memslots) + + nmemslots * sizeof(struct kvm_memory_slot), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; + memcpy(slots, kvm->memslots, + sizeof(struct kvm_memslots) + kvm->memslots->nmemslots * + sizeof(struct kvm_memory_slot)); + slots->nmemslots = nmemslots; slots->generation++; slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; @@ -787,12 +795,21 @@ skip_lpage: } r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + + if (mem->slot >= kvm->memslots->nmemslots) { + nmemslots = mem->slot + 1; + flush = true; + } else + nmemslots = kvm->memslots->nmemslots; + + slots = kzalloc(sizeof(struct kvm_memslots) + + nmemslots * sizeof(struct kvm_memory_slot), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) + + kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot)); + slots->nmemslots = nmemslots; slots->generation++; /* actual memory is freed via old in kvm_free_physmem_slot below */ @@ -808,6 +825,9 @@ skip_lpage: rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); + if (flush) + kvm_arch_flush_shadow(kvm); + kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); kvm_free_physmem_slot(&old, &new); @@ -841,8 +861,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, kvm_userspace_memory_region *mem, int user_alloc) { - if (mem->slot >= KVM_MEMORY_SLOTS) - return -EINVAL; return kvm_set_memory_region(kvm, mem, user_alloc); } @@ -855,7 +873,7 @@ int kvm_get_dirty_log(struct kvm *kvm, unsigned long any = 0; r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= kvm->memslots->nmemslots) goto out; memslot = &kvm->memslots->memslots[log->slot]; @@ -947,7 +965,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) int i; struct kvm_memslots *slots = kvm_memslots(kvm); - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + for (i = KVM_PRIVATE_MEM_SLOTS; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; if (memslot->flags & KVM_MEMSLOT_INVALID) @@ -1832,6 +1850,8 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; + kvm_userspace_mem.slot += KVM_PRIVATE_MEM_SLOTS; + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); if (r) goto out; @@ -1843,6 +1863,9 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&log, argp, sizeof log)) goto out; + + log.slot += KVM_PRIVATE_MEM_SLOTS; + r = kvm_vm_ioctl_get_dirty_log(kvm, &log); if (r) goto out; @@ -1937,7 +1960,7 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (copy_from_user(&compat_log, (void __user *)arg, sizeof(compat_log))) goto out; - log.slot = compat_log.slot; + log.slot = compat_log.slot + KVM_PRIVATE_MEM_SLOTS; log.padding1 = compat_log.padding1; log.padding2 = compat_log.padding2; log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html