[PATCH] KVM: Allow userspace to specify memory to be used for private regions.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The motivation for this patch is to fix a 20KB leak of memory in vmx.c 
when a VM is created and destroyed.

On x86/vmx platforms KVM needs 5 pages of userspace memory per VM for
architecture specific reasons.  It currently allocates the pages on behalf
of user space, but has no way of cleanly freeing that memory while the
user space process is still running.  For user space processes that want
more control over that memory, this patch allows user space to provide the
memory that KVM uses.

Signed-off-by: Andrew Honig <ahonig@xxxxxxxxxx>
---
 Documentation/virtual/kvm/api.txt |    8 +++++++
 arch/arm/kvm/arm.c                |    6 +++++
 arch/ia64/kvm/kvm-ia64.c          |    6 +++++
 arch/powerpc/kvm/powerpc.c        |    6 +++++
 arch/s390/kvm/kvm-s390.c          |    6 +++++
 arch/x86/include/asm/kvm_host.h   |    7 ++++++
 arch/x86/kvm/svm.c                |    8 +++++++
 arch/x86/kvm/vmx.c                |   47 ++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c                |   12 ++++++++--
 include/linux/kvm_host.h          |    2 ++
 virt/kvm/kvm_main.c               |    2 ++
 11 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 119358d..aa18cac 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -879,6 +879,14 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.
 
+On x86/vmx architectures KVM needs 5 pages of user space memory for architecture
+specific reasons.  Calling this ioctl with the special memslot
+KVM_PRIVATE_MEMORY_MEMSLOT will tell kvm which user space memory to use for
+that memory.  If that memslot is not set before creating VCPUs for a VM then
+kvm will allocate the memory on behalf of user space, but userspace will not
+be able to free that memory.  User space should treat this memory as opaque
+and not modify it.
+
 The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
 KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
 writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 5a93698..ac52f14 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -228,6 +228,12 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+int kvm_arch_set_private_memory(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_memory_slot old,
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index ad3126a..570dd97 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1576,6 +1576,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+int kvm_arch_set_private_memory(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_memory_slot old,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 934413c..6e3843b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -410,6 +410,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return kvmppc_core_create_memslot(slot, npages);
 }
 
+int kvm_arch_set_private_memory(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot,
                                    struct kvm_memory_slot old,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4cf35a0..a97f495 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -971,6 +971,12 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+int kvm_arch_set_private_memory(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4979778..7215817 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,6 +37,7 @@
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+#define KVM_PRIVATE_MEMORY_MEMSLOT 0x80000001
 
 #define KVM_MMIO_SIZE 16
 
@@ -553,6 +554,9 @@ struct kvm_arch {
 	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
+	unsigned long ept_ptr;
+	unsigned long apic_ptr;
+	unsigned long tss_ptr;
 
 	unsigned long irq_sources_bitmap;
 	s64 kvmclock_offset;
@@ -640,6 +644,9 @@ struct kvm_x86_ops {
 	bool (*cpu_has_accelerated_tpr)(void);
 	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
+	int (*set_private_memory)(struct kvm *kvm,
+				  struct kvm_userspace_memory_region *mem);
+
 	/* Create, but do not attach this VCPU */
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e1b1ce2..3cc4e56 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1211,6 +1211,12 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int svm_set_private_memory(struct kvm *kvm,
+				  struct kvm_userspace_memory_region *mem)
+{
+	return 0;
+}
+
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	struct vcpu_svm *svm;
@@ -4257,6 +4263,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.hardware_disable = svm_hardware_disable,
 	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
 
+	.set_private_memory = svm_set_private_memory,
+
 	.vcpu_create = svm_create_vcpu,
 	.vcpu_free = svm_free_vcpu,
 	.vcpu_reset = svm_vcpu_reset,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6667042..796ac07 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3692,7 +3692,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	kvm_userspace_mem.flags = 0;
 	kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	if (kvm->arch.apic_ptr) {
+		kvm_userspace_mem.userspace_addr = kvm->arch.apic_ptr;
+		r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, true);
+	} else {
+		r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	}
+
 	if (r)
 		goto out;
 
@@ -3722,7 +3728,13 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	kvm_userspace_mem.guest_phys_addr =
 		kvm->arch.ept_identity_map_addr;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	if (kvm->arch.ept_ptr) {
+		kvm_userspace_mem.userspace_addr = kvm->arch.ept_ptr;
+		r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, true);
+	} else {
+		r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
+	}
+
 	if (r)
 		goto out;
 
@@ -4362,7 +4374,13 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 		.flags = 0,
 	};
 
-	ret = kvm_set_memory_region(kvm, &tss_mem, false);
+	if (kvm->arch.tss_ptr) {
+		tss_mem.userspace_addr = kvm->arch.tss_ptr;
+		ret = kvm_set_memory_region(kvm, &tss_mem, true);
+	} else {
+		ret = kvm_set_memory_region(kvm, &tss_mem, false);
+	}
+
 	if (ret)
 		return ret;
 	kvm->arch.tss_addr = addr;
@@ -6683,6 +6701,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_complete_interrupts(vmx);
 }
 
+static int vmx_set_private_memory(struct kvm *kvm,
+				  struct kvm_userspace_memory_region *mem)
+{
+	/*
+	 * Early sanity checking so userspace gets an error message during
+	 * memory setup and not when trying to use this memory.
+	 * Checks to see if the memory is valid are performed later when
+	 * the memory is used.
+	 */
+	if (!mem->userspace_addr || mem->userspace_addr & (PAGE_SIZE - 1) ||
+			mem->memory_size & (PAGE_SIZE - 1) ||
+			mem->memory_size < PAGE_SIZE * 5)
+		return -EINVAL;
+
+	kvm->arch.ept_ptr = mem->userspace_addr;
+	kvm->arch.apic_ptr = mem->userspace_addr + PAGE_SIZE;
+	kvm->arch.tss_ptr = mem->userspace_addr + PAGE_SIZE * 2;
+
+	return 0;
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7532,6 +7571,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.hardware_disable = hardware_disable,
 	.cpu_has_accelerated_tpr = report_flexpriority,
 
+	.set_private_memory = vmx_set_private_memory,
+
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 	.vcpu_reset = vmx_vcpu_reset,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e172132..7045d0a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6809,6 +6809,12 @@ void kvm_arch_sync_events(struct kvm *kvm)
 	kvm_free_pit(kvm);
 }
 
+int kvm_arch_set_private_memory(struct kvm *kvm,
+			    struct kvm_userspace_memory_region *mem)
+{
+	return kvm_x86_ops->set_private_memory(kvm, mem);
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
@@ -6913,7 +6919,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 * Only private memory slots need to be mapped here since
 	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
 	 */
-	if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
+	if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages &&
+						!user_alloc) {
 		unsigned long userspace_addr;
 
 		/*
@@ -6941,7 +6948,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
 
-	if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
+	if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages &&
+						!user_alloc) {
 		int ret;
 
 		ret = vm_munmap(old.userspace_addr,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c139582..f441d1f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -461,6 +461,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_set_private_memory(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_memory_slot old,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f18013f..5372225 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -949,6 +949,8 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 				   kvm_userspace_memory_region *mem,
 				   bool user_alloc)
 {
+	if (mem->slot == KVM_PRIVATE_MEMORY_MEMSLOT)
+		return kvm_arch_set_private_memory(kvm, mem);
 	if (mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
 	return kvm_set_memory_region(kvm, mem, user_alloc);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux