On Mon, Apr 15, 2013 at 03:10:32PM -0700, Andrew Honig wrote: > > The motivation for this patch is to fix a 20KB leak of memory in vmx.c > when a VM is created and destroyed. > > On x86/vmx platforms KVM needs 5 pages of userspace memory per VM for > architecture specific reasons. It currently allocates the pages on behalf > of user space, but has no way of cleanly freeing that memory while the > user space process is still running. For user space processes that want > more control over that memory, this patch allows user space to provide the > memory that KVM uses. > > Signed-off-by: Andrew Honig <ahonig@xxxxxxxxxx> I thought we agreed on not adding new API and using __kvm_set_memory_region() to unregister private memory regions. > --- > Documentation/virtual/kvm/api.txt | 8 +++++++ > arch/arm/kvm/arm.c | 6 +++++ > arch/ia64/kvm/kvm-ia64.c | 6 +++++ > arch/powerpc/kvm/powerpc.c | 6 +++++ > arch/s390/kvm/kvm-s390.c | 6 +++++ > arch/x86/include/asm/kvm_host.h | 7 ++++++ > arch/x86/kvm/svm.c | 8 +++++++ > arch/x86/kvm/vmx.c | 47 ++++++++++++++++++++++++++++++++++--- > arch/x86/kvm/x86.c | 12 ++++++++-- > include/linux/kvm_host.h | 2 ++ > virt/kvm/kvm_main.c | 2 ++ > 11 files changed, 105 insertions(+), 5 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 119358d..aa18cac 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -879,6 +879,14 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr > be identical. This allows large pages in the guest to be backed by large > pages in the host. > > +On x86/vmx architectures KVM needs 5 pages of user space memory for architecture > +specific reasons. Calling this ioctl with the special memslot > +KVM_PRIVATE_MEMORY_MEMSLOT will tell kvm which user space memory to use for > +that memory. If that memslot is not set before creating VCPUs for a VM then > +kvm will allocate the memory on behalf of user space, but userspace will not > +be able to free that memory. User space should treat this memory as opaque > +and not modify it. > + > The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and > KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of > writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 5a93698..ac52f14 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -228,6 +228,12 @@ int kvm_arch_set_memory_region(struct kvm *kvm, > return 0; > } > > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return 0; > +} > + > int kvm_arch_prepare_memory_region(struct kvm *kvm, > struct kvm_memory_slot *memslot, > struct kvm_memory_slot old, > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index ad3126a..570dd97 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -1576,6 +1576,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) > return 0; > } > > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return 0; > +} > + > int kvm_arch_prepare_memory_region(struct kvm *kvm, > struct kvm_memory_slot *memslot, > struct kvm_memory_slot old, > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index 934413c..6e3843b 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -410,6 +410,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) > return kvmppc_core_create_memslot(slot, npages); > } > > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return 0; > +} > + > int kvm_arch_prepare_memory_region(struct kvm *kvm, > struct kvm_memory_slot *memslot, > struct kvm_memory_slot old, > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 4cf35a0..a97f495 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -971,6 +971,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) > return 0; > } > > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return 0; > +} > + > /* Section: memory related */ > int kvm_arch_prepare_memory_region(struct kvm *kvm, > struct kvm_memory_slot *memslot, > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 4979778..7215817 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -37,6 +37,7 @@ > /* memory slots that are not exposed to userspace */ > #define KVM_PRIVATE_MEM_SLOTS 3 > #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) > +#define KVM_PRIVATE_MEMORY_MEMSLOT 0x80000001 > > #define KVM_MMIO_SIZE 16 > > @@ -553,6 +554,9 @@ struct kvm_arch { > struct page *ept_identity_pagetable; > bool ept_identity_pagetable_done; > gpa_t ept_identity_map_addr; > + unsigned long ept_ptr; > + unsigned long apic_ptr; > + unsigned long tss_ptr; > > unsigned long irq_sources_bitmap; > s64 kvmclock_offset; > @@ -640,6 +644,9 @@ struct kvm_x86_ops { > bool (*cpu_has_accelerated_tpr)(void); > void (*cpuid_update)(struct kvm_vcpu *vcpu); > > + int (*set_private_memory)(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem); > + > /* Create, but do not attach this VCPU */ > struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); > void (*vcpu_free)(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index e1b1ce2..3cc4e56 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1211,6 +1211,12 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) > return 0; > } > > +static int svm_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return 0; > +} > + > static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) > { > struct vcpu_svm *svm; > @@ -4257,6 +4263,8 @@ static struct kvm_x86_ops svm_x86_ops = { > .hardware_disable = svm_hardware_disable, > .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, > > + .set_private_memory = svm_set_private_memory, > + > .vcpu_create = svm_create_vcpu, > .vcpu_free = svm_free_vcpu, > .vcpu_reset = svm_vcpu_reset, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 6667042..796ac07 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -3692,7 +3692,13 @@ static int alloc_apic_access_page(struct kvm *kvm) > kvm_userspace_mem.flags = 0; > kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; > kvm_userspace_mem.memory_size = PAGE_SIZE; > - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); > + if (kvm->arch.apic_ptr) { > + kvm_userspace_mem.userspace_addr = kvm->arch.apic_ptr; > + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, true); > + } else { > + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); > + } > + > if (r) > goto out; > > @@ -3722,7 +3728,13 @@ static int alloc_identity_pagetable(struct kvm *kvm) > kvm_userspace_mem.guest_phys_addr = > kvm->arch.ept_identity_map_addr; > kvm_userspace_mem.memory_size = PAGE_SIZE; > - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); > + if (kvm->arch.ept_ptr) { > + kvm_userspace_mem.userspace_addr = kvm->arch.ept_ptr; > + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, true); > + } else { > + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); > + } > + > if (r) > goto out; > > @@ -4362,7 +4374,13 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) > .flags = 0, > }; > > - ret = kvm_set_memory_region(kvm, &tss_mem, false); > + if (kvm->arch.tss_ptr) { > + tss_mem.userspace_addr = kvm->arch.tss_ptr; > + ret = kvm_set_memory_region(kvm, &tss_mem, true); > + } else { > + ret = kvm_set_memory_region(kvm, &tss_mem, false); > + } > + > if (ret) > return ret; > kvm->arch.tss_addr = addr; > @@ -6683,6 +6701,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > vmx_complete_interrupts(vmx); > } > > +static int vmx_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + /* > + * Early sanity checking so userspace gets an error message during > + * memory setup and not when trying to use this memory. > + * Checks to see if the memory is valid are performed later when > + * the memory is used. > + */ > + if (!mem->userspace_addr || mem->userspace_addr & (PAGE_SIZE - 1) || > + mem->memory_size & (PAGE_SIZE - 1) || > + mem->memory_size < PAGE_SIZE * 5) > + return -EINVAL; > + > + kvm->arch.ept_ptr = mem->userspace_addr; > + kvm->arch.apic_ptr = mem->userspace_addr + PAGE_SIZE; > + kvm->arch.tss_ptr = mem->userspace_addr + PAGE_SIZE * 2; > + > + return 0; > +} > + > static void vmx_free_vcpu(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -7532,6 +7571,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > .hardware_disable = hardware_disable, > .cpu_has_accelerated_tpr = report_flexpriority, > > + .set_private_memory = vmx_set_private_memory, > + > .vcpu_create = vmx_create_vcpu, > .vcpu_free = vmx_free_vcpu, > .vcpu_reset = vmx_vcpu_reset, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e172132..7045d0a 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6809,6 +6809,12 @@ void kvm_arch_sync_events(struct kvm *kvm) > kvm_free_pit(kvm); > } > > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem) > +{ > + return kvm_x86_ops->set_private_memory(kvm, mem); > +} > + > void kvm_arch_destroy_vm(struct kvm *kvm) > { > kvm_iommu_unmap_guest(kvm); > @@ -6913,7 +6919,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > * Only private memory slots need to be mapped here since > * KVM_SET_MEMORY_REGION ioctl is no longer supported. > */ > - if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { > + if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages && > + !user_alloc) { > unsigned long userspace_addr; > > /* > @@ -6941,7 +6948,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > > int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; > > - if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { > + if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages && > + !user_alloc) { > int ret; > > ret = vm_munmap(old.userspace_addr, > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index c139582..f441d1f 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -461,6 +461,8 @@ int __kvm_set_memory_region(struct kvm *kvm, > void kvm_arch_free_memslot(struct kvm_memory_slot *free, > struct kvm_memory_slot *dont); > int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); > +int kvm_arch_set_private_memory(struct kvm *kvm, > + struct kvm_userspace_memory_region *mem); > int kvm_arch_prepare_memory_region(struct kvm *kvm, > struct kvm_memory_slot *memslot, > struct kvm_memory_slot old, > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index f18013f..5372225 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -949,6 +949,8 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, > kvm_userspace_memory_region *mem, > bool user_alloc) > { > + if (mem->slot == KVM_PRIVATE_MEMORY_MEMSLOT) > + return kvm_arch_set_private_memory(kvm, mem); > if (mem->slot >= KVM_USER_MEM_SLOTS) > return -EINVAL; > return kvm_set_memory_region(kvm, mem, user_alloc); > -- > 1.7.10.4 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html