Re: [PATCH 04/10] nEPT: Fix page table format in nested EPT

Orit Wasserman <owasserm@xxxxxxxxxx> · Thu, 10 Nov 2011 15:07:02 +0200



On 11/10/2011 11:59 AM, Nadav Har'El wrote:
> When the existing KVM MMU code creates a shadow page table, it assumes it
> has the normal x86 page table format. This is obviously correct for normal
> shadow page tables, and also correct for AMD's NPT.
> Unfortunately, Intel's EPT page tables differ in subtle ways from ordinary
> page tables, so when we create a shadow EPT table (i.e., in nested EPT),
> we need to slightly modify the way in which this table table is built.
> 
> In particular, when mmu.c's link_shadow_page() creates non-leaf page table
> entries, it used to enable the "present", "accessed", "writable" and "user"
> flags on these entries. While this is correct for ordinary page tables, it
> is wrong in EPT tables - where these bits actually have completely different
> meaning (compare PT_*_MASK from mmu.h to VMX_EPT_*_MASK from vmx.h).
> In particular, leaving the code as-is causes bit 5 of the PTE to be turned on
> (supposedly for PT_ACCESSED_MASK), which is a reserved bit in EPT and causes
> an "EPT Misconfiguration" failure.
> 
> So we must move link_shadow_page's list of extra bits to a new mmu context
> field, which is set differently for nested EPT.
> 
> Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/mmu.c              |   16 +++++++++-------
>  arch/x86/kvm/paging_tmpl.h      |    6 ++++--
>  arch/x86/kvm/vmx.c              |    3 +++
>  4 files changed, 17 insertions(+), 9 deletions(-)
> 
> --- .before/arch/x86/include/asm/kvm_host.h	2011-11-10 11:33:59.000000000 +0200
> +++ .after/arch/x86/include/asm/kvm_host.h	2011-11-10 11:33:59.000000000 +0200
> @@ -287,6 +287,7 @@ struct kvm_mmu {
>  	bool nx;
>  
>  	u64 pdptrs[4]; /* pae */
> +	u64 link_shadow_page_set_bits;
>  };
>  
>  struct kvm_vcpu_arch {
> --- .before/arch/x86/kvm/vmx.c	2011-11-10 11:33:59.000000000 +0200
> +++ .after/arch/x86/kvm/vmx.c	2011-11-10 11:33:59.000000000 +0200
> @@ -6485,6 +6485,9 @@ static int nested_ept_init_mmu_context(s
>  	vcpu->arch.mmu.get_pdptr         = nested_ept_get_pdptr;
>  	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
>  	vcpu->arch.mmu.shadow_root_level = get_ept_level();
> +	vcpu->arch.mmu.link_shadow_page_set_bits =
> +		VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK |
> +		VMX_EPT_EXECUTABLE_MASK;
>  
>  	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
>  
> --- .before/arch/x86/kvm/mmu.c	2011-11-10 11:33:59.000000000 +0200
> +++ .after/arch/x86/kvm/mmu.c	2011-11-10 11:33:59.000000000 +0200
> @@ -1782,14 +1782,9 @@ static void shadow_walk_next(struct kvm_
>  	return __shadow_walk_next(iterator, *iterator->sptep);
>  }
>  
> -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
> +static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, u64 set_bits)
>  {
> -	u64 spte;
> -
> -	spte = __pa(sp->spt)
> -		| PT_PRESENT_MASK | PT_ACCESSED_MASK
> -		| PT_WRITABLE_MASK | PT_USER_MASK;
> -	mmu_spte_set(sptep, spte);
> +	mmu_spte_set(sptep, __pa(sp->spt) | set_bits);
>  }
>  
>  static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
> @@ -3366,6 +3361,13 @@ int kvm_init_shadow_mmu(struct kvm_vcpu 
>  	vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
>  	vcpu->arch.mmu.base_role.smep_andnot_wp
>  		= smep && !is_write_protection(vcpu);
> +	/*
> +	 * link_shadow() should apply these bits in shadow page tables, and
> +	 * in shadow NPT tables (nested NPT). For nested EPT, different bits
> +	 * apply.
> +	 */
> +	vcpu->arch.mmu.link_shadow_page_set_bits = PT_PRESENT_MASK |
> +		PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
>  
>  	return r;
>  }
> --- .before/arch/x86/kvm/paging_tmpl.h	2011-11-10 11:33:59.000000000 +0200
> +++ .after/arch/x86/kvm/paging_tmpl.h	2011-11-10 11:33:59.000000000 +0200
> @@ -515,7 +515,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
>  			goto out_gpte_changed;
>  
>  		if (sp)
> -			link_shadow_page(it.sptep, sp);
> +			link_shadow_page(it.sptep, sp,
> +				vcpu->arch.mmu.link_shadow_page_set_bits);
>  	}
>  
>  	for (;
> @@ -535,7 +536,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
>  
>  		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
>  				      true, direct_access, it.sptep);
> -		link_shadow_page(it.sptep, sp);
> +		link_shadow_page(it.sptep, sp,
> +			vcpu->arch.mmu.link_shadow_page_set_bits);
>  	}
>  
>  	clear_sp_write_flooding_count(it.sptep);
We need to consider the permission in L1's EPT table,what if an entry is read only ?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html