On Mon, Jan 24, 2011 at 10:37:56PM -0700, Alex Williamson wrote: > On Mon, 2011-01-24 at 08:44 -0700, Alex Williamson wrote: > > I'll look at how we might be > > able to allocate slots on demand. Thanks, > > Here's a first cut just to see if this looks agreeable. This allows the > slot array to grow on demand. This works with current userspace, as > well as userspace trivially modified to double KVMState.slots and > hotplugging enough pci-assign devices to exceed the previous limit (w/ & > w/o ept). Hopefully I got the rcu bits correct. Does this look like > the right path? If so, I'll work on removing the fixed limit from > userspace next. Thanks, > > Alex > > > kvm: Allow memory slot array to grow on demand > > Remove fixed KVM_MEMORY_SLOTS limit, allowing the slot array > to grow on demand. Private slots are now allocated at the > front instead of the end. Only x86 seems to use private slots, > so this is now zero for all other archs. The memslots pointer > is already updated using rcu, so changing the size off the > array when it's replaces is straight forward. x86 also keeps > a bitmap of slots used by a kvm_mmu_page, which requires a > shadow tlb flush whenever we increase the number of slots. > This forces the pages to be rebuilt with the new bitmap size. > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > --- > > arch/ia64/include/asm/kvm_host.h | 4 -- > arch/ia64/kvm/kvm-ia64.c | 2 + > arch/powerpc/include/asm/kvm_host.h | 3 -- > arch/s390/include/asm/kvm_host.h | 3 -- > arch/x86/include/asm/kvm_host.h | 3 +- > arch/x86/include/asm/vmx.h | 6 ++- > arch/x86/kvm/mmu.c | 7 +++- > arch/x86/kvm/x86.c | 6 ++- > include/linux/kvm_host.h | 7 +++- > virt/kvm/kvm_main.c | 65 ++++++++++++++++++++++++----------- > 10 files changed, 63 insertions(+), 43 deletions(-) > > > diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h > index 2689ee5..11d0ab2 100644 > --- a/arch/ia64/include/asm/kvm_host.h > +++ b/arch/ia64/include/asm/kvm_host.h > @@ -23,10 +23,6 @@ > #ifndef __ASM_KVM_HOST_H > #define __ASM_KVM_HOST_H > > -#define KVM_MEMORY_SLOTS 32 > -/* memory slots that does not exposed to userspace */ > -#define KVM_PRIVATE_MEM_SLOTS 4 > - > #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 > > /* define exit reasons from vmm to kvm*/ > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index 70d224d..f1adda2 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -1814,7 +1814,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, > mutex_lock(&kvm->slots_lock); > > r = -EINVAL; > - if (log->slot >= KVM_MEMORY_SLOTS) > + if (log->slot >= kvm->memslots->nmemslots) > goto out; > > memslot = &kvm->memslots->memslots[log->slot]; > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index bba3b9b..dc80057 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -29,9 +29,6 @@ > #include <asm/kvm_asm.h> > > #define KVM_MAX_VCPUS 1 > -#define KVM_MEMORY_SLOTS 32 > -/* memory slots that does not exposed to userspace */ > -#define KVM_PRIVATE_MEM_SLOTS 4 > > #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 > > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > index cef7dbf..92a964c 100644 > --- a/arch/s390/include/asm/kvm_host.h > +++ b/arch/s390/include/asm/kvm_host.h > @@ -20,9 +20,6 @@ > #include <asm/cpu.h> > > #define KVM_MAX_VCPUS 64 > -#define KVM_MEMORY_SLOTS 32 > -/* memory slots that does not exposed to userspace */ > -#define KVM_PRIVATE_MEM_SLOTS 4 > > struct sca_entry { > atomic_t scn; > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index ffd7f8d..df1382c 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -27,7 +27,6 @@ > #include <asm/msr-index.h> > > #define KVM_MAX_VCPUS 64 > -#define KVM_MEMORY_SLOTS 32 > /* memory slots that does not exposed to userspace */ > #define KVM_PRIVATE_MEM_SLOTS 4 > > @@ -207,7 +206,7 @@ struct kvm_mmu_page { > * One bit set per slot which has memory > * in this shadow page. > */ > - DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); > + unsigned long *slot_bitmap; > bool multimapped; /* More than one parent_pte? */ > bool unsync; > int root_count; /* Currently serving as active root */ > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 84471b8..7fd8c89 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -370,9 +370,9 @@ enum vmcs_field { > > #define AR_RESERVD_MASK 0xfffe0f00 > > -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) > -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) > -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) > +#define TSS_PRIVATE_MEMSLOT 0 > +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 1 > +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 2 > > #define VMX_NR_VPIDS (1 << 16) > #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index ccacf0b..8c2533a 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -1032,6 +1032,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) > ASSERT(is_empty_shadow_page(sp->spt)); > hlist_del(&sp->hash_link); > list_del(&sp->link); > + kfree(sp->slot_bitmap); > __free_page(virt_to_page(sp->spt)); > if (!sp->role.direct) > __free_page(virt_to_page(sp->gfns)); > @@ -1048,6 +1049,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, > u64 *parent_pte, int direct) > { > struct kvm_mmu_page *sp; > + struct kvm_memslots *slots = kvm_memslots(vcpu->kvm); > > sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); > sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); > @@ -1056,7 +1058,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, > PAGE_SIZE); > set_page_private(virt_to_page(sp->spt), (unsigned long)sp); > list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); > - bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); > + sp->slot_bitmap = kzalloc(sizeof(long) * > + BITS_TO_LONGS(slots->nmemslots), GFP_KERNEL); > + if (!sp->slot_bitmap) > + return NULL; > sp->multimapped = 0; > sp->parent_pte = parent_pte; > kvm_mod_used_mmu_pages(vcpu->kvm, +1); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 5eccdba..c002fac 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1978,7 +1978,7 @@ int kvm_dev_ioctl_check_extension(long ext) > r = KVM_MAX_VCPUS; > break; > case KVM_CAP_NR_MEMSLOTS: > - r = KVM_MEMORY_SLOTS; > + r = INT_MAX - KVM_PRIVATE_MEM_SLOTS; > break; > case KVM_CAP_PV_MMU: /* obsolete */ > r = 0; > @@ -3201,7 +3201,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, > mutex_lock(&kvm->slots_lock); > > r = -EINVAL; > - if (log->slot >= KVM_MEMORY_SLOTS) > + if (log->slot >= kvm->memslots->nmemslots) > goto out; > > memslot = &kvm->memslots->memslots[log->slot]; > @@ -6068,7 +6068,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; > > /* Prevent internal slot pages from being moved by fork()/COW. */ > - if (memslot->id >= KVM_MEMORY_SLOTS) > + if (memslot->id < KVM_PRIVATE_MEM_SLOTS) > map_flags = MAP_SHARED | MAP_ANONYMOUS; > > /*To keep backward compatibility with older userspace, > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index b5021db..4cb9f94 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -27,6 +27,10 @@ > > #include <asm/kvm_host.h> > > +#ifndef KVM_PRIVATE_MEM_SLOTS > + #define KVM_PRIVATE_MEM_SLOTS 0 > +#endif > + > /* > * vcpu->requests bit members > */ > @@ -206,8 +210,7 @@ struct kvm_irq_routing_table {}; > struct kvm_memslots { > int nmemslots; > u64 generation; > - struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + > - KVM_PRIVATE_MEM_SLOTS]; > + struct kvm_memory_slot memslots[]; > }; > > struct kvm { > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index fd67bcd..32f023c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -623,13 +623,14 @@ int __kvm_set_memory_region(struct kvm *kvm, > struct kvm_userspace_memory_region *mem, > int user_alloc) > { > - int r; > + int r, nmemslots; > gfn_t base_gfn; > unsigned long npages; > unsigned long i; > - struct kvm_memory_slot *memslot; > - struct kvm_memory_slot old, new; > + struct kvm_memory_slot *memslot = NULL; > + struct kvm_memory_slot old = {}, new = {}; > struct kvm_memslots *slots, *old_memslots; > + bool flush = false; > > r = -EINVAL; > /* General sanity checks */ > @@ -639,12 +640,9 @@ int __kvm_set_memory_region(struct kvm *kvm, > goto out; > if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) > goto out; > - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) > - goto out; > if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) > goto out; > > - memslot = &kvm->memslots->memslots[mem->slot]; > base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; > npages = mem->memory_size >> PAGE_SHIFT; > > @@ -655,7 +653,10 @@ int __kvm_set_memory_region(struct kvm *kvm, > if (!npages) > mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; > > - new = old = *memslot; > + if (mem->slot < kvm->memslots->nmemslots) { > + memslot = &kvm->memslots->memslots[mem->slot]; > + new = old = *memslot; > + } > if (s == memslot || !s->npages) > @@ -752,12 +753,19 @@ skip_lpage: > > if (!npages) { > r = -ENOMEM; > - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); > + > + nmemslots = (mem->slot >= kvm->memslots->nmemslots) ? > + mem->slot + 1 : kvm->memslots->nmemslots; > + > + slots = kzalloc(sizeof(struct kvm_memslots) + > + nmemslots * sizeof(struct kvm_memory_slot), > + GFP_KERNEL); > if (!slots) > goto out_free; > - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); > - if (mem->slot >= slots->nmemslots) > - slots->nmemslots = mem->slot + 1; > + memcpy(slots, kvm->memslots, > + sizeof(struct kvm_memslots) + kvm->memslots->nmemslots * > + sizeof(struct kvm_memory_slot)); > + slots->nmemslots = nmemslots; Other than the upper limit, should disallow increasing the number of slots in case the new slot is being deleted (npages == 0). So none of this additions to !npages case should be necessary. Also, must disallow shrinking the number of slots. > slots->generation++; > slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; > > @@ -787,12 +795,21 @@ skip_lpage: > } > > r = -ENOMEM; > - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); > + > + if (mem->slot >= kvm->memslots->nmemslots) { > + nmemslots = mem->slot + 1; > + flush = true; > + } else > + nmemslots = kvm->memslots->nmemslots; > + > + slots = kzalloc(sizeof(struct kvm_memslots) + > + nmemslots * sizeof(struct kvm_memory_slot), > + GFP_KERNEL); > if (!slots) > goto out_free; > - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); > - if (mem->slot >= slots->nmemslots) > - slots->nmemslots = mem->slot + 1; > + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) + > + kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot)); > + slots->nmemslots = nmemslots; > slots->generation++; > > /* actual memory is freed via old in kvm_free_physmem_slot below */ > @@ -808,6 +825,9 @@ skip_lpage: > rcu_assign_pointer(kvm->memslots, slots); It should be: spin_lock(kvm->mmu_lock) rcu_assign_pointer() kvm_arch_flush_shadow() spin_unlock(kvm->mmu_lock) But kvm_arch_flush_shadow() takes mmu_lock currently, so that needs fixing. Otherwise looks fine. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html