Re: [RFC PATCH 0/2] Expose available KVM free memory slot count to help avoid aborts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 24, 2011 at 10:37:56PM -0700, Alex Williamson wrote:
> On Mon, 2011-01-24 at 08:44 -0700, Alex Williamson wrote:
> > I'll look at how we might be
> > able to allocate slots on demand.  Thanks,
> 
> Here's a first cut just to see if this looks agreeable.  This allows the
> slot array to grow on demand.  This works with current userspace, as
> well as userspace trivially modified to double KVMState.slots and
> hotplugging enough pci-assign devices to exceed the previous limit (w/ &
> w/o ept).  Hopefully I got the rcu bits correct.  Does this look like
> the right path?  If so, I'll work on removing the fixed limit from
> userspace next.  Thanks,
> 
> Alex
> 
> 
> kvm: Allow memory slot array to grow on demand
> 
> Remove fixed KVM_MEMORY_SLOTS limit, allowing the slot array
> to grow on demand.  Private slots are now allocated at the
> front instead of the end.  Only x86 seems to use private slots,
> so this is now zero for all other archs.  The memslots pointer
> is already updated using rcu, so changing the size off the
> array when it's replaces is straight forward.  x86 also keeps
> a bitmap of slots used by a kvm_mmu_page, which requires a
> shadow tlb flush whenever we increase the number of slots.
> This forces the pages to be rebuilt with the new bitmap size.
> 
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> 
>  arch/ia64/include/asm/kvm_host.h    |    4 --
>  arch/ia64/kvm/kvm-ia64.c            |    2 +
>  arch/powerpc/include/asm/kvm_host.h |    3 --
>  arch/s390/include/asm/kvm_host.h    |    3 --
>  arch/x86/include/asm/kvm_host.h     |    3 +-
>  arch/x86/include/asm/vmx.h          |    6 ++-
>  arch/x86/kvm/mmu.c                  |    7 +++-
>  arch/x86/kvm/x86.c                  |    6 ++-
>  include/linux/kvm_host.h            |    7 +++-
>  virt/kvm/kvm_main.c                 |   65 ++++++++++++++++++++++++-----------
>  10 files changed, 63 insertions(+), 43 deletions(-)
> 
> 
> diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
> index 2689ee5..11d0ab2 100644
> --- a/arch/ia64/include/asm/kvm_host.h
> +++ b/arch/ia64/include/asm/kvm_host.h
> @@ -23,10 +23,6 @@
>  #ifndef __ASM_KVM_HOST_H
>  #define __ASM_KVM_HOST_H
>  
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
> -
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
>  
>  /* define exit reasons from vmm to kvm*/
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 70d224d..f1adda2 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1814,7 +1814,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>  	mutex_lock(&kvm->slots_lock);
>  
>  	r = -EINVAL;
> -	if (log->slot >= KVM_MEMORY_SLOTS)
> +	if (log->slot >= kvm->memslots->nmemslots)
>  		goto out;
>  
>  	memslot = &kvm->memslots->memslots[log->slot];
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index bba3b9b..dc80057 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -29,9 +29,6 @@
>  #include <asm/kvm_asm.h>
>  
>  #define KVM_MAX_VCPUS 1
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
>  
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
>  
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index cef7dbf..92a964c 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -20,9 +20,6 @@
>  #include <asm/cpu.h>
>  
>  #define KVM_MAX_VCPUS 64
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
>  
>  struct sca_entry {
>  	atomic_t scn;
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ffd7f8d..df1382c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -27,7 +27,6 @@
>  #include <asm/msr-index.h>
>  
>  #define KVM_MAX_VCPUS 64
> -#define KVM_MEMORY_SLOTS 32
>  /* memory slots that does not exposed to userspace */
>  #define KVM_PRIVATE_MEM_SLOTS 4
>  
> @@ -207,7 +206,7 @@ struct kvm_mmu_page {
>  	 * One bit set per slot which has memory
>  	 * in this shadow page.
>  	 */
> -	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> +	unsigned long *slot_bitmap;
>  	bool multimapped;         /* More than one parent_pte? */
>  	bool unsync;
>  	int root_count;          /* Currently serving as active root */
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 84471b8..7fd8c89 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -370,9 +370,9 @@ enum vmcs_field {
>  
>  #define AR_RESERVD_MASK 0xfffe0f00
>  
> -#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
> -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
> -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
> +#define TSS_PRIVATE_MEMSLOT			0
> +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	1
> +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	2
>  
>  #define VMX_NR_VPIDS				(1 << 16)
>  #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index ccacf0b..8c2533a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1032,6 +1032,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	ASSERT(is_empty_shadow_page(sp->spt));
>  	hlist_del(&sp->hash_link);
>  	list_del(&sp->link);
> +	kfree(sp->slot_bitmap);
>  	__free_page(virt_to_page(sp->spt));
>  	if (!sp->role.direct)
>  		__free_page(virt_to_page(sp->gfns));
> @@ -1048,6 +1049,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
>  					       u64 *parent_pte, int direct)
>  {
>  	struct kvm_mmu_page *sp;
> +	struct kvm_memslots *slots = kvm_memslots(vcpu->kvm);
>  
>  	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
>  	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
> @@ -1056,7 +1058,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
>  						  PAGE_SIZE);
>  	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>  	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
> -	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> +	sp->slot_bitmap = kzalloc(sizeof(long) *
> +				  BITS_TO_LONGS(slots->nmemslots), GFP_KERNEL);
> +	if (!sp->slot_bitmap)
> +		return NULL;
>  	sp->multimapped = 0;
>  	sp->parent_pte = parent_pte;
>  	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5eccdba..c002fac 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1978,7 +1978,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  		r = KVM_MAX_VCPUS;
>  		break;
>  	case KVM_CAP_NR_MEMSLOTS:
> -		r = KVM_MEMORY_SLOTS;
> +		r = INT_MAX - KVM_PRIVATE_MEM_SLOTS;
>  		break;
>  	case KVM_CAP_PV_MMU:	/* obsolete */
>  		r = 0;
> @@ -3201,7 +3201,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>  	mutex_lock(&kvm->slots_lock);
>  
>  	r = -EINVAL;
> -	if (log->slot >= KVM_MEMORY_SLOTS)
> +	if (log->slot >= kvm->memslots->nmemslots)
>  		goto out;
>  
>  	memslot = &kvm->memslots->memslots[log->slot];
> @@ -6068,7 +6068,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>  	int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
>  
>  	/* Prevent internal slot pages from being moved by fork()/COW. */
> -	if (memslot->id >= KVM_MEMORY_SLOTS)
> +	if (memslot->id < KVM_PRIVATE_MEM_SLOTS)
>  		map_flags = MAP_SHARED | MAP_ANONYMOUS;
>  
>  	/*To keep backward compatibility with older userspace,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b5021db..4cb9f94 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -27,6 +27,10 @@
>  
>  #include <asm/kvm_host.h>
>  
> +#ifndef KVM_PRIVATE_MEM_SLOTS
> + #define KVM_PRIVATE_MEM_SLOTS 0
> +#endif
> +
>  /*
>   * vcpu->requests bit members
>   */
> @@ -206,8 +210,7 @@ struct kvm_irq_routing_table {};
>  struct kvm_memslots {
>  	int nmemslots;
>  	u64 generation;
> -	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
> -					KVM_PRIVATE_MEM_SLOTS];
> +	struct kvm_memory_slot memslots[];
>  };
>  
>  struct kvm {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index fd67bcd..32f023c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -623,13 +623,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  			    struct kvm_userspace_memory_region *mem,
>  			    int user_alloc)
>  {
> -	int r;
> +	int r, nmemslots;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long i;
> -	struct kvm_memory_slot *memslot;
> -	struct kvm_memory_slot old, new;
> +	struct kvm_memory_slot *memslot = NULL;
> +	struct kvm_memory_slot old = {}, new = {};
>  	struct kvm_memslots *slots, *old_memslots;
> +	bool flush = false;
>  
>  	r = -EINVAL;
>  	/* General sanity checks */
> @@ -639,12 +640,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  		goto out;
>  	if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
>  		goto out;
> -	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
> -		goto out;
>  	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
>  		goto out;
>  
> -	memslot = &kvm->memslots->memslots[mem->slot];
>  	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
>  	npages = mem->memory_size >> PAGE_SHIFT;
>  
> @@ -655,7 +653,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	if (!npages)
>  		mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
>  
> -	new = old = *memslot;
> +	if (mem->slot < kvm->memslots->nmemslots) {
> +		memslot = &kvm->memslots->memslots[mem->slot];
> +		new = old = *memslot;
> +	}

>  		if (s == memslot || !s->npages)
> @@ -752,12 +753,19 @@ skip_lpage:
>  
>  	if (!npages) {
>  		r = -ENOMEM;
> -		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> +		nmemslots = (mem->slot >= kvm->memslots->nmemslots) ?
> +			    mem->slot + 1 : kvm->memslots->nmemslots;
> +
> +		slots = kzalloc(sizeof(struct kvm_memslots) +
> +				nmemslots * sizeof(struct kvm_memory_slot),
> +				GFP_KERNEL);
>  		if (!slots)
>  			goto out_free;
> -		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> -		if (mem->slot >= slots->nmemslots)
> -			slots->nmemslots = mem->slot + 1;
> +		memcpy(slots, kvm->memslots,
> +		       sizeof(struct kvm_memslots) + kvm->memslots->nmemslots *
> +		       sizeof(struct kvm_memory_slot));
> +		slots->nmemslots = nmemslots;

Other than the upper limit, should disallow increasing the number of
slots in case the new slot is being deleted (npages == 0). So none of
this additions to !npages case should be necessary.

Also, must disallow shrinking the number of slots.

>  		slots->generation++;
>  		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
>  
> @@ -787,12 +795,21 @@ skip_lpage:
>  	}
>  
>  	r = -ENOMEM;
> -	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> +	if (mem->slot >= kvm->memslots->nmemslots) {
> +		nmemslots = mem->slot + 1;
> +		flush = true;
> +	} else
> +		nmemslots = kvm->memslots->nmemslots;
> +
> +	slots = kzalloc(sizeof(struct kvm_memslots) +
> +			nmemslots * sizeof(struct kvm_memory_slot),
> +			GFP_KERNEL);
>  	if (!slots)
>  		goto out_free;
> -	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> -	if (mem->slot >= slots->nmemslots)
> -		slots->nmemslots = mem->slot + 1;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) +
> +	       kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot));
> +	slots->nmemslots = nmemslots;
>  	slots->generation++;
>  
>  	/* actual memory is freed via old in kvm_free_physmem_slot below */
> @@ -808,6 +825,9 @@ skip_lpage:
>  	rcu_assign_pointer(kvm->memslots, slots);

It should be: 

    spin_lock(kvm->mmu_lock)
    rcu_assign_pointer()
    kvm_arch_flush_shadow()
    spin_unlock(kvm->mmu_lock)

But kvm_arch_flush_shadow() takes mmu_lock currently, so that needs
fixing. 

Otherwise looks fine.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux