Re: [PATCH] kvm: mmu: Fix race in emulated page table writes

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Mon, 26 Nov 2018 11:16:18 +0100

On 31/10/18 22:53, Junaid Shahid wrote:
> When a guest page table is updated via an emulated write,
> kvm_mmu_pte_write() is called to update the shadow PTE using the just
> written guest PTE value. But if two emulated guest PTE writes happened
> concurrently, it is possible that the guest PTE and the shadow PTE end
> up being out of sync. Emulated writes do not mark the shadow page as
> unsync-ed, so this inconsistency will not be resolved even by a guest TLB
> flush (unless the page was marked as unsync-ed at some other point).
> 
> This is fixed by re-reading the current value of the guest PTE after the
> MMU lock has been acquired instead of just using the value that was
> written prior to calling kvm_mmu_pte_write().
> 
> Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
> ---
>  arch/x86/kvm/mmu.c | 27 +++++++++------------------
>  1 file changed, 9 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 4cf43ce42959..c2afe7feb7c7 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -5083,9 +5083,9 @@ static bool need_remote_flush(u64 old, u64 new)
>  }
>  
>  static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
> -				    const u8 *new, int *bytes)
> +				    int *bytes)
>  {
> -	u64 gentry;
> +	u64 gentry = 0;
>  	int r;
>  
>  	/*
> @@ -5097,22 +5097,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
>  		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
>  		*gpa &= ~(gpa_t)7;
>  		*bytes = 8;
> -		r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
> -		if (r)
> -			gentry = 0;
> -		new = (const u8 *)&gentry;
>  	}
>  
> -	switch (*bytes) {
> -	case 4:
> -		gentry = *(const u32 *)new;
> -		break;
> -	case 8:
> -		gentry = *(const u64 *)new;
> -		break;
> -	default:
> -		gentry = 0;
> -		break;
> +	if (*bytes == 4 || *bytes == 8) {
> +		r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
> +		if (r)
> +			gentry = 0;
>  	}
>  
>  	return gentry;
> @@ -5216,8 +5206,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>  
>  	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
>  
> -	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
> -
>  	/*
>  	 * No need to care whether allocation memory is successful
>  	 * or not since pte prefetch is skiped if it does not have
> @@ -5226,6 +5214,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>  	mmu_topup_memory_caches(vcpu);
>  
>  	spin_lock(&vcpu->kvm->mmu_lock);
> +
> +	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
> +
>  	++vcpu->kvm->stat.mmu_pte_write;
>  	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
>  
> 

Queued, thanks.

Paolo