The current reexecute_instruction can not well detect the failed instruction emulation. It allows guest to retry all the instructions except it accesses on error pfn. For example, some cases are nested-write-protect - if the page we want to write is used as PDE but it chains to itself. Under this case, we should stop the emulation and report the case to userspace. Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/paging_tmpl.h | 2 + arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++++++----------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b2e11f4..c5eb52f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -566,6 +566,8 @@ struct kvm_arch { u64 hv_guest_os_id; u64 hv_hypercall; + /* synchronizing reexecute_instruction and page fault path. */ + u64 page_fault_count; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; #endif diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 891eb6d..d55ad89 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -568,6 +568,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; + vcpu->kvm->arch.page_fault_count++; + kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); kvm_mmu_free_some_pages(vcpu); if (!force_pt_level) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5fe72cc..2fe484b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4473,37 +4473,61 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2) { gpa_t gpa = cr2; pfn_t pfn; - - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) - return false; + u64 page_fault_count; + int emulate; if (!vcpu->arch.mmu.direct_map) { gpa = kvm_mmu_gva_to_gpa_read(vcpu, cr2, NULL); + /* + * If the mapping is invalid in guest, let cpu retry + * it to generate fault. + */ if (gpa == UNMAPPED_GVA) - return true; /* let cpu generate fault */ + return true; } /* - * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-enter the - * guest to let CPU execute the instruction. - */ - if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) - return true; - - /* * Do not retry the unhandleable instruction if it faults on the * readonly host memory, otherwise it will goto a infinite loop: * retry instruction -> write #PF -> emulation fail -> retry * instruction -> ... */ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - if (!is_error_noslot_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + + /* + * If the instruction failed on the error pfn, it can not be fixed, + * report the error to userspace. + */ + if (is_error_noslot_pfn(pfn)) + return false; + + kvm_release_pfn_clean(pfn); + + /* The instructions are well-emulated on direct mmu. */ + if (vcpu->arch.mmu.direct_map) { + if (ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + return true; } - return false; +again: + page_fault_count = ACCESS_ONCE(vcpu->kvm->arch.page_fault_count); + + /* + * if emulation was due to access to shadowed page table + * and it failed try to unshadow page and re-enter the + * guest to let CPU execute the instruction. + */ + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + emulate = vcpu->arch.mmu.page_fault(vcpu, cr2, PFERR_WRITE_MASK, false); + + /* The page fault path called above can increase the count. */ + if (page_fault_count + 1 != + ACCESS_ONCE(vcpu->kvm->arch.page_fault_count)) + goto again; + + return !emulate; } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html