On 01/11/2013 01:26 AM, Marcelo Tosatti wrote: > On Tue, Jan 08, 2013 at 02:38:36PM +0800, Xiao Guangrong wrote: >> The current reexecute_instruction can not well detect the failed instruction >> emulation. It allows guest to retry all the instructions except it accesses >> on error pfn >> >> For example, some cases are nested-write-protect - if the page we want to >> write is used as PDE but it chains to itself. Under this case, we should >> stop the emulation and report the case to userspace >> >> Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> >> --- >> arch/x86/include/asm/kvm_host.h | 7 +++++++ >> arch/x86/kvm/paging_tmpl.h | 27 ++++++++++++++++++++------- >> arch/x86/kvm/x86.c | 8 +++++++- >> 3 files changed, 34 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index c431b33..d6ab8d2 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -502,6 +502,13 @@ struct kvm_vcpu_arch { >> u64 msr_val; >> struct gfn_to_hva_cache data; >> } pv_eoi; >> + >> + /* >> + * Indicate whether the access faults on its page table in guest >> + * which is set when fix page fault and used to detect unhandeable >> + * instruction. >> + */ >> + bool write_fault_to_shadow_pgtable; >> }; >> >> struct kvm_lpage_info { >> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h >> index 67b390d..df50560 100644 >> --- a/arch/x86/kvm/paging_tmpl.h >> +++ b/arch/x86/kvm/paging_tmpl.h >> @@ -497,26 +497,34 @@ out_gpte_changed: >> * created when kvm establishes shadow page table that stop kvm using large >> * page size. Do it early can avoid unnecessary #PF and emulation. >> * >> + * @write_fault_to_shadow_pgtable will return true if the fault gfn is >> + * currently used as its page table. >> + * >> * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok >> * since the PDPT is always shadowed, that means, we can not use large page >> * size to map the gfn which is used as PDPT. >> */ >> static bool >> FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, >> - struct guest_walker *walker, int user_fault) >> + struct guest_walker *walker, int user_fault, >> + bool *write_fault_to_shadow_pgtable) >> { >> int level; >> gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); >> + bool self_changed = false; >> >> if (!(walker->pte_access & ACC_WRITE_MASK || >> (!is_write_protection(vcpu) && !user_fault))) >> return false; >> >> - for (level = walker->level; level <= walker->max_level; level++) >> - if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask)) >> - return true; >> + for (level = walker->level; level <= walker->max_level; level++) { >> + gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; >> + >> + self_changed |= !(gfn & mask); >> + *write_fault_to_shadow_pgtable |= !gfn; >> + } >> >> - return false; >> + return self_changed; >> } >> >> /* >> @@ -544,7 +552,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, >> int level = PT_PAGE_TABLE_LEVEL; >> int force_pt_level; >> unsigned long mmu_seq; >> - bool map_writable; >> + bool map_writable, is_self_change_mapping; >> >> pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); >> >> @@ -572,9 +580,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, >> return 0; >> } >> >> + vcpu->arch.write_fault_to_shadow_pgtable = false; >> + >> + is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, >> + &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); >> + >> if (walker.level >= PT_DIRECTORY_LEVEL) >> force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) >> - || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault); >> + || is_self_change_mapping; >> else >> force_pt_level = 1; >> if (!force_pt_level) { >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 6f13e03..2957012 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -4810,7 +4810,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) >> * guest to let CPU execute the instruction. >> */ >> kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); >> - return true; >> + >> + /* >> + * If the access faults on its page table, it can not >> + * be fixed by unprotecting shadow page and it should >> + * be reported to userspace. >> + */ >> + return !vcpu->arch.write_fault_to_shadow_pgtable; >> } > > This sounds wrong: only reporting emulation failure in case > of a write fault to shadow pagetable? We suppose unprotecting target-gfn can avoid emulation, the same as current code. :( > > The current pattern is sane: > > if (condition_1 which allows reexecution is true) > return true; > > if (condition_2 which allows reexecution is true) > return true; > ... > return false; Unfortunately, the current code reports failure only when the access fault on error pfn: pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); if (!is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return true; } return false; All !is_rror_pfn returns true. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html