From: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx> The code has clearly suffered from over inlining. So make the body of the walk loop a separate function: do_walk(). This will make it easy to do more cleanups and optimizations later. This was suggested by Ingo Molnar. Cc: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 21 ++++ arch/x86/kvm/paging_tmpl.h | 227 ++++++++++++++++++++++++-------------------- 2 files changed, 145 insertions(+), 103 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2d14434..16ccf4b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -69,6 +69,27 @@ char *audit_point_name[] = { "post sync" }; +/* + * do_walk() returns one of these. + * + * WALK_NEXT: Continue the walk loop. + * WALK_DONE: Break from the walk loop. + * WALK_RETRY: Retry walk. + * WALK_NOT_PRESENT: Set PFERR_PRESENT_MASK and goto error. + * WALK_RSVD_FAULT: Set PFERR_RSVD_MASK and goto error. + * WALK_ERROR: Goto error. + * WALK_ABORT: Return immediately. + */ +enum { + WALK_NEXT, + WALK_DONE, + WALK_RETRY, + WALK_NOT_PRESENT, + WALK_RSVD_FAULT, + WALK_ERROR, + WALK_ABORT +}; + #undef MMU_DEBUG #ifdef MMU_DEBUG diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 711336b..4913aa5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -114,6 +114,111 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) } /* + * Walk one level. + * Guest pte and its user address will be put in *pte and *ptep_user. + */ +static inline int +FNAME(do_walk)(struct guest_walker *walker, struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, gva_t addr, u32 access, bool *eperm, + pt_element_t *pte, pt_element_t __user **ptep_user) +{ + gfn_t real_gfn; + unsigned long host_addr; + unsigned index = PT_INDEX(addr, walker->level); + int offset = index * sizeof(pt_element_t); + gfn_t table_gfn = gpte_to_gfn(*pte); + gpa_t pte_gpa = gfn_to_gpa(table_gfn) + offset; + const int write_fault = access & PFERR_WRITE_MASK; + const int user_fault = access & PFERR_USER_MASK; + const int fetch_fault = access & PFERR_FETCH_MASK; + + walker->table_gfn[walker->level - 1] = table_gfn; + walker->pte_gpa[walker->level - 1] = pte_gpa; + + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK|PFERR_WRITE_MASK); + if (unlikely(real_gfn == UNMAPPED_GVA)) + return WALK_NOT_PRESENT; + real_gfn = gpa_to_gfn(real_gfn); + + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + if (unlikely(kvm_is_error_hva(host_addr))) + return WALK_NOT_PRESENT; + + *ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + if (unlikely(__copy_from_user(pte, *ptep_user, sizeof(*pte)))) + return WALK_NOT_PRESENT; + + trace_kvm_mmu_paging_element(*pte, walker->level); + + if (unlikely(!is_present_gpte(*pte))) + return WALK_NOT_PRESENT; + + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, *pte, walker->level))) + return WALK_RSVD_FAULT; + + if (unlikely(write_fault && !is_writable_pte(*pte) + && (user_fault || is_write_protection(vcpu)))) + *eperm = true; + + if (unlikely(user_fault && !(*pte & PT_USER_MASK))) + *eperm = true; + +#if PTTYPE == 64 + if (unlikely(fetch_fault && (*pte & PT64_NX_MASK))) + *eperm = true; +#endif + + if (!*eperm && unlikely(!(*pte & PT_ACCESSED_MASK))) { + int ret; + + trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(*pte)); + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, *ptep_user, index, + *pte, *pte|PT_ACCESSED_MASK); + if (unlikely(ret < 0)) + return WALK_NOT_PRESENT; + else if (ret) + return WALK_RETRY; + + mark_page_dirty(vcpu->kvm, table_gfn); + *pte |= PT_ACCESSED_MASK; + } + + walker->pte_access = walker->pt_access & FNAME(gpte_access)(vcpu, *pte); + + walker->ptes[walker->level - 1] = *pte; + + if ((walker->level == PT_PAGE_TABLE_LEVEL) || + ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(*pte) && + (PTTYPE == 64 || is_pse(vcpu))) || + ((walker->level == PT_PDPE_LEVEL) && is_large_pte(*pte) && + (mmu->root_level == PT64_ROOT_LEVEL))) { + gpa_t real_gpa; + gfn_t gfn; + u32 ac; + + gfn = gpte_to_gfn_lvl(*pte, walker->level); + gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; + + if (PTTYPE == 32 && (walker->level == PT_DIRECTORY_LEVEL) && + is_cpuid_PSE36()) + gfn += pse36_gfn_delta(*pte); + + ac = write_fault | fetch_fault | user_fault; + + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac); + if (real_gpa == UNMAPPED_GVA) + return WALK_ABORT; + + walker->gfn = real_gpa >> PAGE_SHIFT; + + return WALK_DONE; + } + + return WALK_NEXT; +} + +/* * Fetch a guest pte for a guest virtual address */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, @@ -130,7 +235,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); -walk: +walk_retry: eperm = false; walker->level = mmu->root_level; pte = mmu->get_cr3(vcpu); @@ -152,118 +257,34 @@ walk: walker->pt_access = ACC_ALL; for (;;) { - gfn_t real_gfn; - unsigned long host_addr; - unsigned index = PT_INDEX(addr, walker->level); - int offset = index * sizeof(pt_element_t); - gfn_t table_gfn = gpte_to_gfn(pte); - gpa_t pte_gpa = gfn_to_gpa(table_gfn) + offset; - - walker->table_gfn[walker->level - 1] = table_gfn; - walker->pte_gpa[walker->level - 1] = pte_gpa; - - real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), - PFERR_USER_MASK|PFERR_WRITE_MASK); - if (unlikely(real_gfn == UNMAPPED_GVA)) { - errcode |= PFERR_PRESENT_MASK; - goto error; - } - real_gfn = gpa_to_gfn(real_gfn); - - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); - if (unlikely(kvm_is_error_hva(host_addr))) { - errcode |= PFERR_PRESENT_MASK; - goto error; - } - - ptep_user = (pt_element_t __user *)((void *)host_addr + offset); - if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { - errcode |= PFERR_PRESENT_MASK; - goto error; - } - - trace_kvm_mmu_paging_element(pte, walker->level); + int ret; - if (unlikely(!is_present_gpte(pte))) { + ret = FNAME(do_walk)(walker, vcpu, mmu, addr, access, + &eperm, &pte, &ptep_user); + switch (ret) { + case WALK_NEXT: + break; + case WALK_DONE: + goto walk_done; + case WALK_RETRY: + goto walk_retry; + case WALK_NOT_PRESENT: errcode |= PFERR_PRESENT_MASK; goto error; - } - - if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, - walker->level))) { + case WALK_RSVD_FAULT: errcode |= PFERR_RSVD_MASK; goto error; - } - - if (unlikely(write_fault && !is_writable_pte(pte) - && (user_fault || is_write_protection(vcpu)))) - eperm = true; - - if (unlikely(user_fault && !(pte & PT_USER_MASK))) - eperm = true; - -#if PTTYPE == 64 - if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) - eperm = true; -#endif - - if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { - int ret; - trace_kvm_mmu_set_accessed_bit(table_gfn, index, - sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, - pte, pte|PT_ACCESSED_MASK); - if (unlikely(ret < 0)) { - errcode |= PFERR_PRESENT_MASK; - goto error; - } else if (ret) - goto walk; - - mark_page_dirty(vcpu->kvm, table_gfn); - pte |= PT_ACCESSED_MASK; - } - - walker->pte_access = walker->pt_access & - FNAME(gpte_access)(vcpu, pte); - - walker->ptes[walker->level - 1] = pte; - - if ((walker->level == PT_PAGE_TABLE_LEVEL) || - ((walker->level == PT_DIRECTORY_LEVEL) && - is_large_pte(pte) && - (PTTYPE == 64 || is_pse(vcpu))) || - ((walker->level == PT_PDPE_LEVEL) && - is_large_pte(pte) && - mmu->root_level == PT64_ROOT_LEVEL)) { - int lvl = walker->level; - gpa_t real_gpa; - gfn_t gfn; - u32 ac; - - gfn = gpte_to_gfn_lvl(pte, lvl); - gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; - - if (PTTYPE == 32 && - walker->level == PT_DIRECTORY_LEVEL && - is_cpuid_PSE36()) - gfn += pse36_gfn_delta(pte); - - ac = write_fault | fetch_fault | user_fault; - - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), - ac); - if (real_gpa == UNMAPPED_GVA) - return 0; - - walker->gfn = real_gpa >> PAGE_SHIFT; - - break; + case WALK_ERROR: + goto error; + case WALK_ABORT: + return 0; } walker->pt_access = walker->pte_access; --walker->level; } +walk_done: if (unlikely(eperm)) goto error; @@ -279,7 +300,7 @@ walk: errcode |= PFERR_PRESENT_MASK; goto error; } else if (ret) - goto walk; + goto walk_retry; mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html