On Thu, Aug 01, 2013 at 04:31:31PM +0800, Xiao Guangrong wrote: > On 07/31/2013 10:48 PM, Gleb Natapov wrote: > > From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > > > Inject nEPT fault to L1 guest. This patch is original from Xinhao. > > > > Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> > > Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx> > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> > > --- > > arch/x86/include/asm/kvm_host.h | 4 ++++ > > arch/x86/kvm/mmu.c | 34 ++++++++++++++++++++++++++++++++++ > > arch/x86/kvm/paging_tmpl.h | 28 ++++++++++++++++++++++++---- > > arch/x86/kvm/vmx.c | 17 +++++++++++++++++ > > 4 files changed, 79 insertions(+), 4 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 531f47c..58a17c0 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -286,6 +286,7 @@ struct kvm_mmu { > > u64 *pae_root; > > u64 *lm_root; > > u64 rsvd_bits_mask[2][4]; > > + u64 bad_mt_xwr; > > > > /* > > * Bitmap: bit set = last pte in walk > > @@ -512,6 +513,9 @@ struct kvm_vcpu_arch { > > * instruction. > > */ > > bool write_fault_to_shadow_pgtable; > > + > > + /* set at EPT violation at this point */ > > + unsigned long exit_qualification; > > }; > > > > struct kvm_lpage_info { > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index 3df3ac3..58ae9db 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -3521,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, > > int maxphyaddr = cpuid_maxphyaddr(vcpu); > > u64 exb_bit_rsvd = 0; > > > > + context->bad_mt_xwr = 0; > > + > > if (!context->nx) > > exb_bit_rsvd = rsvd_bits(63, 63); > > switch (context->root_level) { > > @@ -3576,6 +3578,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, > > } > > } > > > > +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, > > + struct kvm_mmu *context, bool execonly) > > +{ > > + int maxphyaddr = cpuid_maxphyaddr(vcpu); > > + int pte; > > + > > + context->rsvd_bits_mask[0][3] = > > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); > > + context->rsvd_bits_mask[0][2] = > > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); > > + context->rsvd_bits_mask[0][1] = > > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); > > + context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); > > + > > + /* large page */ > > + context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; > > + context->rsvd_bits_mask[1][2] = > > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); > > + context->rsvd_bits_mask[1][1] = > > + rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); > > + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; > > + > > + for (pte = 0; pte < 64; pte++) { > > + int rwx_bits = pte & 7; > > + int mt = pte >> 3; > > + if (mt == 0x2 || mt == 0x3 || mt == 0x7 || > > + rwx_bits == 0x2 || rwx_bits == 0x6 || > > + (rwx_bits == 0x4 && !execonly)) > > + context->bad_mt_xwr |= (1ull << pte); > > + } > > +} > > + > > static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) > > { > > unsigned bit, byte, pfec; > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > > index 0d25351..ed6773e 100644 > > --- a/arch/x86/kvm/paging_tmpl.h > > +++ b/arch/x86/kvm/paging_tmpl.h > > @@ -127,12 +127,13 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) > > *access &= mask; > > } > > > > -static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) > > +static bool inline FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, > > + int level) > > Not sure this explicit "inline" is needed... Gcc always inlines the small and > static functions. > Paolo asked for it, but now I see that I did in in a wrong patch. I do not care much personally either way, I agree with you though, compiler will inline it anyway. > > { > > - int bit7; > > + int bit7 = (gpte >> 7) & 1, low5 = gpte & 0x3f; > > > > - bit7 = (gpte >> 7) & 1; > > - return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; > > + return ((gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0) | > > + ((mmu->bad_mt_xwr & (1ull << low5)) != 0); > > } > > > > static inline int FNAME(is_present_gpte)(unsigned long pte) > > @@ -386,6 +387,25 @@ error: > > walker->fault.vector = PF_VECTOR; > > walker->fault.error_code_valid = true; > > walker->fault.error_code = errcode; > > + > > +#if PTTYPE == PTTYPE_EPT > > + /* > > + * Use PFERR_RSVD_MASK in error_code to to tell if EPT > > + * misconfiguration requires to be injected. The detection is > > + * done by is_rsvd_bits_set() above. > > + * > > + * We set up the value of exit_qualification to inject: > > + * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation > > + * [5:3] - Calculated by the page walk of the guest EPT page tables > > + * [7:8] - Clear to 0. > > Do not know why always clear bit 7 and bit 8, especially bit 7 is always set for > the normal case. The SDM says these about bit 7: > The guest linear-address field is valid for all EPT violations except those > resulting from an attempt to load the guest PDPTEs as part of the execution of > the MOV CR instruction. > > So L0 always tells L1 that the fault is caused by "the guest PDPTEs as part of > the execution of the MOV CR instruction". You are right, it looks like we need to copy bits 7:8 from exit_qualification. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html