On 11/13/2011 04:32 PM, Avi Kivity wrote: > On 11/13/2011 01:30 PM, Orit Wasserman wrote: >> Maybe this patch can help, this is roughly what Avi wants (I hope) done very quickly. >> I'm sorry I don't have setup to run nested VMX at the moment so i can't test it. >> >> Orit >> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index 9335e1b..bbe212f 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -3180,6 +3180,10 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, >> #include "paging_tmpl.h" >> #undef PTTYPE >> >> +#define PTTYPE EPT >> +#include "paging_tmpl.h" >> +#undef PTTYPE >> + > > Yes, that's the key. > > >> int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); >> void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); >> int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); >> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h >> index 507e2b8..70d4cfd 100644 >> --- a/arch/x86/kvm/paging_tmpl.h >> +++ b/arch/x86/kvm/paging_tmpl.h >> @@ -39,6 +39,21 @@ >> #define CMPXCHG cmpxchg64 >> #define PT_MAX_FULL_LEVELS 2 >> #endif >> +#elif PTTYPE == EPT >> + #define pt_element_t u64 >> + #define FNAME(name) EPT_##name >> + #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK >> + #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) >> + #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) >> + #define PT_INDEX(addr, level) PT64_INDEX(addr, level) >> + #define PT_LEVEL_BITS PT64_LEVEL_BITS >> + #ifdef CONFIG_X86_64 >> + #define PT_MAX_FULL_LEVELS 4 >> + #define CMPXCHG cmpxchg >> + #else >> + #define CMPXCHG cmpxchg64 >> + #define PT_MAX_FULL_LEVELS 2 >> + #endif > > The various masks should be defined here, to avoid lots of #ifdefs later. > That what I did first but than I was afraid that the MASK will be changed for mmu.c too. so I decided on ifdefs. The more I think about it I think we need rapper function for mask checking (at least for this file). What do you think ? >> #elif PTTYPE == 32 >> #define pt_element_t u32 >> #define guest_walker guest_walker32 >> @@ -106,14 +121,19 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, >> { >> unsigned access; >> >> +#if PTTYPE == EPT >> access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; >> +#else >> + access = (gpte & EPT_WRITABLE_MASK) | EPT_EXEC_MASK; >> if (last && !is_dirty_gpte(gpte)) >> access &= ~ACC_WRITE_MASK; >> +#endif > > Like here, you could make is_dirty_gpte() local to paging_tmpl() > returning true for EPT and the dirty bit otherwise. > > >> >> #if PTTYPE == 64 >> if (vcpu->arch.mmu.nx) >> access &= ~(gpte >> PT64_NX_SHIFT); > > The ept X bit is lost. > > Could do something like > > access &= (gpte >> PT_X_NX_SHIFT) ^ PT_X_NX_SENSE; > > >> +#if PTTYPE == EPT >> + const int write_fault = access & EPT_WRITABLE_MASK; >> + const int user_fault = 0; >> + const int fetch_fault = 0; >> +#else > > EPT has fetch permissions (but not user permissions); anyway > translate_nested_gpa() already does this. > >> const int write_fault = access & PFERR_WRITE_MASK; >> const int user_fault = access & PFERR_USER_MASK; >> const int fetch_fault = access & PFERR_FETCH_MASK; >> +#endif >> u16 errcode = 0; >> >> trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, >> @@ -174,6 +200,9 @@ retry_walk: >> (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); >> >> pt_access = ACC_ALL; >> +#if PTTYPE == EPT >> + pt_access = PT_PRESENT_MASK | EPT_WRITABLE_MASK | EPT_EXEC_MASK; >> +#endif > > pt_access is not in EPT or ia32 format - it's our own format (xwu). So > this doesn't need changing. Updating gpte_access() is sufficient. > >> >> for (;;) { >> gfn_t real_gfn; >> @@ -186,9 +215,14 @@ retry_walk: >> pte_gpa = gfn_to_gpa(table_gfn) + offset; >> walker->table_gfn[walker->level - 1] = table_gfn; >> walker->pte_gpa[walker->level - 1] = pte_gpa; >> - >> +#if PTTYPE == EPT >> + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), >> + EPT_WRITABLE_MASK); >> +#else >> real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), >> PFERR_USER_MASK|PFERR_WRITE_MASK); >> +#endif >> + > > Unneeded, I think. Is it because translate_nested_gpa always set USER_MASK ? > >> if (unlikely(real_gfn == UNMAPPED_GVA)) >> goto error; >> real_gfn = gpa_to_gfn(real_gfn); >> @@ -221,6 +255,7 @@ retry_walk: >> eperm = true; >> #endif >> >> +#if PTTYPE != EPT >> if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { >> int ret; >> trace_kvm_mmu_set_accessed_bit(table_gfn, index, >> @@ -235,7 +270,7 @@ retry_walk: >> mark_page_dirty(vcpu->kvm, table_gfn); >> pte |= PT_ACCESSED_MASK; >> } >> - >> +#endif > > If PT_ACCESSED_MASK is 0 for EPT, this goes away without #ifdef. > true >> +#if PTTYPE != EPT >> /* check if the kernel is fetching from user page */ >> if (unlikely(pte_access & PT_USER_MASK) && >> kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) >> if (fetch_fault && !user_fault) >> eperm = true; >> - >> +#endif > > Same here. > > > Orit -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html