On Mon, Apr 08, 2019 at 11:07:30AM -0700, Ben Gardon wrote: > KVM bases its memory usage limits on the total number of guest pages > across all memslots. However, those limits, and the calculations to > produce, them use 32 bit unsigned integers. This can result in overflow Nit: I think you wanted the comma after "them". > if a VM has more guest pages that can be represented by a u32. As a > result of this overflow, KVM can use a low limit on the number of MMU > pages it will allocate. This makes KVM unable to map all of guest memory > at once, prompting spurious faults. > > Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch > introduced no new failures. > > Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 12 ++++++------ > arch/x86/kvm/mmu.c | 13 ++++++------- > arch/x86/kvm/mmu.h | 2 +- > arch/x86/kvm/x86.c | 4 ++-- > 4 files changed, 15 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 159b5988292f3..9b7b731a00321 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) > } > > #define KVM_PERMILLE_MMU_PAGES 20 > -#define KVM_MIN_ALLOC_MMU_PAGES 64 > +#define KVM_MIN_ALLOC_MMU_PAGES 64UL > #define KVM_MMU_HASH_SHIFT 12 > #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) > #define KVM_MIN_FREE_MMU_PAGES 5 > @@ -844,9 +844,9 @@ enum kvm_irqchip_mode { > }; > > struct kvm_arch { > - unsigned int n_used_mmu_pages; > - unsigned int n_requested_mmu_pages; > - unsigned int n_max_mmu_pages; > + unsigned long n_used_mmu_pages; > + unsigned long n_requested_mmu_pages; > + unsigned long n_max_mmu_pages; > unsigned int indirect_shadow_pages; It probably makes sense to change 'indirect_shadow_pages' as well. I haven't done the math to know whether or not it can actually overflow, but 4 bytes per VM seems cheap compared to a bug that causes KVM to think it doesn't have any shadow pages. Note, there's also a local variable in reexecute_instruction() that snapshots 'indirect_shadow_pages'. Another case that should be changed to an unsigned long is 'lpages' in kvm_arch_create_memslot(). Overflow there seems inevitable, and it's a local so there's no memory overhead. > struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; > /* > @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, > gfn_t gfn_offset, unsigned long mask); > void kvm_mmu_zap_all(struct kvm *kvm); > void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); > -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); > -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); > +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); > +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); > > int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); > bool pdptrs_changed(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index eee455a8a612d..bd07270196744 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt) > * aggregate version in order to make the slab shrinker > * faster > */ > -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) > +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) > { > kvm->arch.n_used_mmu_pages += nr; > percpu_counter_add(&kvm_total_used_mmu_pages, nr); > @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, > * Changing the number of mmu pages allocated to the vm > * Note: if goal_nr_mmu_pages is too small, you will get dead lock > */ > -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) > +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) > { > LIST_HEAD(invalid_list); > > @@ -6031,10 +6031,10 @@ int kvm_mmu_module_init(void) > /* > * Calculate mmu pages needed for kvm. > */ > -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) > +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) > { > - unsigned int nr_mmu_pages; > - unsigned int nr_pages = 0; > + unsigned long nr_mmu_pages; > + unsigned long nr_pages = 0; > struct kvm_memslots *slots; > struct kvm_memory_slot *memslot; > int i; > @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) > } > > nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; > - nr_mmu_pages = max(nr_mmu_pages, > - (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); > + nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); > > return nr_mmu_pages; > } > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h > index bbdc60f2fae89..54c2a377795be 100644 > --- a/arch/x86/kvm/mmu.h > +++ b/arch/x86/kvm/mmu.h > @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); > int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, > u64 fault_address, char *insn, int insn_len); > > -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) > +static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) > { > if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) > return kvm->arch.n_max_mmu_pages - > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 099b851dabafd..455f156f56ede 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, > } > > static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, > - u32 kvm_nr_mmu_pages) > + unsigned long kvm_nr_mmu_pages) > { > if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) > return -EINVAL; > @@ -4284,7 +4284,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, > return 0; > } > > -static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) > +static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) > { > return kvm->arch.n_max_mmu_pages; > } > -- > 2.21.0.392.gf8f6787159e-goog >