Re: [PATCH v2] kvm: mmu: Fix overflow on kvm mmu page limit calculation

Sean Christopherson <sean.j.christopherson@xxxxxxxxx> · Tue, 9 Apr 2019 07:20:08 -0700

On Mon, Apr 08, 2019 at 11:07:30AM -0700, Ben Gardon wrote:
> KVM bases its memory usage limits on the total number of guest pages
> across all memslots. However, those limits, and the calculations to
> produce, them use 32 bit unsigned integers. This can result in overflow

Nit: I think you wanted the comma after "them".

> if a VM has more guest pages that can be represented by a u32. As a
> result of this overflow, KVM can use a low limit on the number of MMU
> pages it will allocate. This makes KVM unable to map all of guest memory
> at once, prompting spurious faults.
> 
> Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch
> 	introduced no new failures.
> 
> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h | 12 ++++++------
>  arch/x86/kvm/mmu.c              | 13 ++++++-------
>  arch/x86/kvm/mmu.h              |  2 +-
>  arch/x86/kvm/x86.c              |  4 ++--
>  4 files changed, 15 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 159b5988292f3..9b7b731a00321 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
>  }
>  
>  #define KVM_PERMILLE_MMU_PAGES 20
> -#define KVM_MIN_ALLOC_MMU_PAGES 64
> +#define KVM_MIN_ALLOC_MMU_PAGES 64UL
>  #define KVM_MMU_HASH_SHIFT 12
>  #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
>  #define KVM_MIN_FREE_MMU_PAGES 5
> @@ -844,9 +844,9 @@ enum kvm_irqchip_mode {
>  };
>  
>  struct kvm_arch {
> -	unsigned int n_used_mmu_pages;
> -	unsigned int n_requested_mmu_pages;
> -	unsigned int n_max_mmu_pages;
> +	unsigned long n_used_mmu_pages;
> +	unsigned long n_requested_mmu_pages;
> +	unsigned long n_max_mmu_pages;
>  	unsigned int indirect_shadow_pages;

It probably makes sense to change 'indirect_shadow_pages' as well.  I
haven't done the math to know whether or not it can actually overflow,
but 4 bytes per VM seems cheap compared to a bug that causes KVM to think
it doesn't have any shadow pages.  Note, there's also a local variable
in reexecute_instruction() that snapshots 'indirect_shadow_pages'.

Another case that should be changed to an unsigned long is 'lpages' in
kvm_arch_create_memslot().  Overflow there seems inevitable, and it's a
local so there's no memory overhead.

>  	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>  	/*
> @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
>  				   gfn_t gfn_offset, unsigned long mask);
>  void kvm_mmu_zap_all(struct kvm *kvm);
>  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
> -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
> +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
>  
>  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
>  bool pdptrs_changed(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index eee455a8a612d..bd07270196744 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
>   * aggregate version in order to make the slab shrinker
>   * faster
>   */
> -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
> +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
>  {
>  	kvm->arch.n_used_mmu_pages += nr;
>  	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
> @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
>   * Changing the number of mmu pages allocated to the vm
>   * Note: if goal_nr_mmu_pages is too small, you will get dead lock
>   */
> -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
> +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
>  {
>  	LIST_HEAD(invalid_list);
>  
> @@ -6031,10 +6031,10 @@ int kvm_mmu_module_init(void)
>  /*
>   * Calculate mmu pages needed for kvm.
>   */
> -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
> +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
>  {
> -	unsigned int nr_mmu_pages;
> -	unsigned int  nr_pages = 0;
> +	unsigned long nr_mmu_pages;
> +	unsigned long nr_pages = 0;
>  	struct kvm_memslots *slots;
>  	struct kvm_memory_slot *memslot;
>  	int i;
> @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
>  	}
>  
>  	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
> -	nr_mmu_pages = max(nr_mmu_pages,
> -			   (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +	nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
>  
>  	return nr_mmu_pages;
>  }
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index bbdc60f2fae89..54c2a377795be 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
>  int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>  				u64 fault_address, char *insn, int insn_len);
>  
> -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
> +static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
>  {
>  	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
>  		return kvm->arch.n_max_mmu_pages -
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabafd..455f156f56ede 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
>  }
>  
>  static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
> -					  u32 kvm_nr_mmu_pages)
> +					 unsigned long kvm_nr_mmu_pages)
>  {
>  	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
>  		return -EINVAL;
> @@ -4284,7 +4284,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
>  	return 0;
>  }
>  
> -static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
> +static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
>  {
>  	return kvm->arch.n_max_mmu_pages;
>  }
> -- 
> 2.21.0.392.gf8f6787159e-goog
>