[PATCH] KVM: replace large kvmalloc allocation with vmalloc

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Fri, 15 Oct 2021 12:55:19 -0400

KVM's paging data structures (especially the rmaps) can be made as
large as possible by userspace simply by creating large-enough memslots.
Since commit 7661809d493b ("mm: don't allow oversized kvmalloc() calls")
these huge allocations cause a warning, assuming that they could be the
result of an integer overflow or underflow.

Since there are configurations in the wild creating a multi-TiB memslot,
and in fact it is more likely than not that these allocations end up not
using kmalloc-ed memory.  For example, the dirty bitmap for a 64 GiB
memslot would cause a 4 MiB allocation, since each 32 KiB of guest
address space corresponds to 2 bytes in the dirty bitmap.  Therefore,
just use vmalloc directly.  Introduce a new helper vcalloc to check for
overflow for extra paranoia, even though it should not be a problem here
even on 32-bit systems.

Reported-by: syzbot+e0de2333cbf95ea473e8@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
 arch/x86/kvm/mmu/page_track.c |  3 +--
 arch/x86/kvm/x86.c            |  4 ++--
 include/linux/vmalloc.h       | 10 ++++++++++
 virt/kvm/kvm_main.c           |  4 ++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 21427e84a82e..0d9842472288 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -36,8 +36,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
 
 	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
 		slot->arch.gfn_track[i] =
-			kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-				 GFP_KERNEL_ACCOUNT);
+			vcalloc(npages, sizeof(*slot->arch.gfn_track[i]));
 		if (!slot->arch.gfn_track[i])
 			goto track_free;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..07f5760ea30c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11394,7 +11394,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 
 		WARN_ON(slot->arch.rmap[i]);
 
-		slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
+		slot->arch.rmap[i] = vcalloc(lpages, sz);
 		if (!slot->arch.rmap[i]) {
 			memslot_rmap_free(slot);
 			return -ENOMEM;
@@ -11475,7 +11475,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 
 		lpages = __kvm_mmu_slot_lpages(slot, npages, level);
 
-		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
+		linfo = vcalloc(lpages, sizeof(*linfo));
 		if (!linfo)
 			goto out_free;
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 671d402c3778..6d51c83c2b0e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -167,6 +167,16 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
 extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 							unsigned long pgoff);
 
+static inline void *vcalloc(size_t n, size_t size)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
+		return NULL;
+
+	return vzalloc(bytes);
+}
+
 /*
  * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
  * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7851f3a1b5f7..0295d89f5445 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1242,9 +1242,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
  */
 static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
-	unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
+	unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
 
-	memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
+	memslot->dirty_bitmap = vcalloc(2, dirty_bytes);
 	if (!memslot->dirty_bitmap)
 		return -ENOMEM;
 
-- 
2.27.0