From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> FNAME(page_fault) verifies PDPTE for nested NPT in PAE paging mode because nested_svm_get_tdp_pdptr() reads the guest NPT's PDPTE from memory unconditionally for each call. The verifying is complicated and it works only when mmu->pae_root is always used when the guest is PAE paging. Move the verifying code in FNAME(fetch) and simplify it since the local shadow page is used and it can be walked in FNAME(fetch) and unlinked from children via drop_spte(). It also allows for mmu->pae_root NOT to be used when it is NOT required to be put in a 32bit CR3. Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> --- arch/x86/kvm/mmu/paging_tmpl.h | 72 ++++++++++++++++------------------ 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index cd6032e1947c..67c419bce1e5 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -659,6 +659,39 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, clear_sp_write_flooding_count(it.sptep); drop_large_spte(vcpu, it.sptep); + /* + * When nested NPT enabled and L1 is PAE paging, + * mmu->get_pdptrs() which is nested_svm_get_tdp_pdptr() reads + * the guest NPT's PDPTE from memory unconditionally for each + * call. + * + * The guest PAE root page is not write-protected. + * + * The mmu->get_pdptrs() in FNAME(walk_addr_generic) might get + * a value different from previous calls or different from the + * return value of mmu->get_pdptrs() in mmu_alloc_shadow_roots(). + * + * It will cause the following code installs the spte in a wrong + * sp or links a sp to a wrong parent if the return value of + * mmu->get_pdptrs() is not verified unchanged since + * FNAME(gpte_changed) can't check this kind of change. + * + * Verify the return value of mmu->get_pdptrs() (only the gfn + * in it needs to be checked) and drop the spte if the gfn isn't + * matched. + * + * Do the verifying unconditionally when the guest is PAE + * paging no matter whether it is nested NPT or not to avoid + * complicated code. + */ + if (vcpu->arch.mmu->cpu_role.base.level == PT32E_ROOT_LEVEL && + it.level == PT32E_ROOT_LEVEL && + is_shadow_present_pte(*it.sptep)) { + sp = to_shadow_page(*it.sptep & PT64_BASE_ADDR_MASK); + if (gw->table_gfn[it.level - 2] != sp->gfn) + drop_spte(vcpu->kvm, it.sptep); + } + sp = NULL; if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; @@ -886,44 +919,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - /* - * When nested NPT enabled and L1 is PAE paging, mmu->get_pdptrs() - * which is nested_svm_get_tdp_pdptr() reads the guest NPT's PDPTE - * from memory unconditionally for each call. - * - * The guest PAE root page is not write-protected. - * - * The mmu->get_pdptrs() in FNAME(walk_addr_generic) might get a value - * different from previous calls or different from the return value of - * mmu->get_pdptrs() in mmu_alloc_shadow_roots(). - * - * It will cause FNAME(fetch) installs the spte in a wrong sp or links - * a sp to a wrong parent if the return value of mmu->get_pdptrs() - * is not verified unchanged since FNAME(gpte_changed) can't check - * this kind of change. - * - * Verify the return value of mmu->get_pdptrs() (only the gfn in it - * needs to be checked) and do kvm_mmu_free_roots() like load_pdptr() - * if the gfn isn't matched. - * - * Do the verifying unconditionally when the guest is PAE paging no - * matter whether it is nested NPT or not to avoid complicated code. - */ - if (vcpu->arch.mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { - u64 pdpte = vcpu->arch.mmu->pae_root[(fault->addr >> 30) & 3]; - struct kvm_mmu_page *sp = NULL; - - if (IS_VALID_PAE_ROOT(pdpte)) - sp = to_shadow_page(pdpte & PT64_BASE_ADDR_MASK); - - if (!sp || walker.table_gfn[PT32E_ROOT_LEVEL - 2] != sp->gfn) { - write_unlock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_roots(vcpu->kvm, vcpu->arch.mmu, - KVM_MMU_ROOT_CURRENT); - goto release_clean; - } - } - r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; @@ -931,7 +926,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault out_unlock: write_unlock(&vcpu->kvm->mmu_lock); -release_clean: kvm_release_pfn_clean(fault->pfn); return r; } -- 2.19.1.6.gb485710b