Re: [PATCH] kvm: Nested KVM MMUs need PAE root too

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Wed, 11 Sep 2019 18:04:26 +0200

On 22/06/19 19:42, Jiří Paleček wrote:
> On AMD processors, in PAE 32bit mode, nested KVM instances don't
> work. The L0 host get a kernel OOPS, which is related to
> arch.mmu->pae_root being NULL.
> 
> The reason for this is that when setting up nested KVM instance,
> arch.mmu is set to &arch.guest_mmu (while normally, it would be
> &arch.root_mmu). However, the initialization and allocation of
> pae_root only creates it in root_mmu. KVM code (ie. in
> mmu_alloc_shadow_roots) then accesses arch.mmu->pae_root, which is the
> unallocated arch.guest_mmu->pae_root.
> 
> This fix just allocates (and frees) pae_root in both guest_mmu and
> root_mmu (and also lm_root if it was allocated). The allocation is
> subject to previous restrictions ie. it won't allocate anything on
> 64-bit and AFAIK not on Intel.
> 
> See bug 203923 for details.
> 
> Signed-off-by: Jiri Palecek <jpalecek@xxxxxx>
> Tested-by: Jiri Palecek <jpalecek@xxxxxx>
> 
> ---
>  arch/x86/kvm/mmu.c | 30 ++++++++++++++++++++++--------
>  1 file changed, 22 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 24843cf49579..efa8285bb56d 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -5592,13 +5592,13 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
>  				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
>  }
> 
> -static void free_mmu_pages(struct kvm_vcpu *vcpu)
> +static void free_mmu_pages(struct kvm_mmu *mmu)
>  {
> -	free_page((unsigned long)vcpu->arch.mmu->pae_root);
> -	free_page((unsigned long)vcpu->arch.mmu->lm_root);
> +	free_page((unsigned long)mmu->pae_root);
> +	free_page((unsigned long)mmu->lm_root);
>  }
> 
> -static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
> +static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
>  {
>  	struct page *page;
>  	int i;
> @@ -5619,9 +5619,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
>  	if (!page)
>  		return -ENOMEM;
> 
> -	vcpu->arch.mmu->pae_root = page_address(page);
> +	mmu->pae_root = page_address(page);
>  	for (i = 0; i < 4; ++i)
> -		vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
> +		mmu->pae_root[i] = INVALID_PAGE;
> 
>  	return 0;
>  }
> @@ -5629,6 +5629,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
>  int kvm_mmu_create(struct kvm_vcpu *vcpu)
>  {
>  	uint i;
> +	int ret;
> 
>  	vcpu->arch.mmu = &vcpu->arch.root_mmu;
>  	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
> @@ -5646,7 +5647,19 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
>  		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
> 
>  	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
> -	return alloc_mmu_pages(vcpu);
> +
> +	ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
> +	if (ret)
> +		return ret;
> +
> +	ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
> +	if (ret)
> +		goto fail_allocate_root;
> +
> +	return ret;
> + fail_allocate_root:
> +	free_mmu_pages(&vcpu->arch.guest_mmu);
> +	return ret;
>  }
> 
>  static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
> @@ -6102,7 +6115,8 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
>  void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
>  {
>  	kvm_mmu_unload(vcpu);
> -	free_mmu_pages(vcpu);
> +	free_mmu_pages(&vcpu->arch.root_mmu);
> +	free_mmu_pages(&vcpu->arch.guest_mmu);
>  	mmu_free_memory_caches(vcpu);
>  }
> 
> --
> 2.23.0.rc1
> 

Queued, thanks.  The goto usage is somewhat borderline, but not unheard of.

Paolo