Re: [PATCH v5 10/14] nEPT: Add nEPT violation/misconfigration support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/31/2013 10:48 PM, Gleb Natapov wrote:
> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> 
> Inject nEPT fault to L1 guest. This patch is original from Xinhao.
> 
> Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx>
> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |    4 ++++
>  arch/x86/kvm/mmu.c              |   34 ++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/paging_tmpl.h      |   28 ++++++++++++++++++++++++----
>  arch/x86/kvm/vmx.c              |   17 +++++++++++++++++
>  4 files changed, 79 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 531f47c..58a17c0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -286,6 +286,7 @@ struct kvm_mmu {
>  	u64 *pae_root;
>  	u64 *lm_root;
>  	u64 rsvd_bits_mask[2][4];
> +	u64 bad_mt_xwr;
> 
>  	/*
>  	 * Bitmap: bit set = last pte in walk
> @@ -512,6 +513,9 @@ struct kvm_vcpu_arch {
>  	 * instruction.
>  	 */
>  	bool write_fault_to_shadow_pgtable;
> +
> +	/* set at EPT violation at this point */
> +	unsigned long exit_qualification;
>  };
> 
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 3df3ac3..58ae9db 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3521,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
>  	int maxphyaddr = cpuid_maxphyaddr(vcpu);
>  	u64 exb_bit_rsvd = 0;
> 
> +	context->bad_mt_xwr = 0;
> +
>  	if (!context->nx)
>  		exb_bit_rsvd = rsvd_bits(63, 63);
>  	switch (context->root_level) {
> @@ -3576,6 +3578,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
>  	}
>  }
> 
> +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
> +		struct kvm_mmu *context, bool execonly)
> +{
> +	int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +	int pte;
> +
> +	context->rsvd_bits_mask[0][3] =
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
> +	context->rsvd_bits_mask[0][2] =
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
> +	context->rsvd_bits_mask[0][1] =
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
> +	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
> +
> +	/* large page */
> +	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
> +	context->rsvd_bits_mask[1][2] =
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
> +	context->rsvd_bits_mask[1][1] =
> +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
> +	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
> +	
> +	for (pte = 0; pte < 64; pte++) {
> +		int rwx_bits = pte & 7;
> +		int mt = pte >> 3;
> +		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
> +				rwx_bits == 0x2 || rwx_bits == 0x6 ||
> +				(rwx_bits == 0x4 && !execonly))
> +			context->bad_mt_xwr |= (1ull << pte);
> +	}
> +}
> +
>  static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
>  {
>  	unsigned bit, byte, pfec;
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 0d25351..ed6773e 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -127,12 +127,13 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
>  	*access &= mask;
>  }
> 
> -static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
> +static bool inline FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte,
> +		int level)

Not sure this explicit "inline" is needed... Gcc always inlines the small and
static functions.

>  {
> -	int bit7;
> +	int bit7 = (gpte >> 7) & 1, low5 = gpte & 0x3f;
> 
> -	bit7 = (gpte >> 7) & 1;
> -	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
> +	return ((gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0) |
> +		((mmu->bad_mt_xwr & (1ull << low5)) != 0);
>  }
> 
>  static inline int FNAME(is_present_gpte)(unsigned long pte)
> @@ -386,6 +387,25 @@ error:
>  	walker->fault.vector = PF_VECTOR;
>  	walker->fault.error_code_valid = true;
>  	walker->fault.error_code = errcode;
> +
> +#if PTTYPE == PTTYPE_EPT
> +	/*
> +	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
> +	 * misconfiguration requires to be injected. The detection is
> +	 * done by is_rsvd_bits_set() above.
> +	 *
> +	 * We set up the value of exit_qualification to inject:
> +	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
> +	 * [5:3] - Calculated by the page walk of the guest EPT page tables
> +	 * [7:8] - Clear to 0.

Do not know why always clear bit 7 and bit 8, especially bit 7 is always set for
the normal case. The SDM says these about bit 7:
The guest linear-address field is valid for all EPT violations except those
resulting from an attempt to load the guest PDPTEs as part of the execution of
the MOV CR instruction.

So L0 always tells L1 that the fault is caused by "the guest PDPTEs as part of
the execution of the MOV CR instruction".

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux