On 11.08.2017 18:52, Paolo Bonzini wrote: > There is currently some confusion between nested and L1 GPAs. The > assignment to "direct" in kvm_mmu_page_fault tries to fix that, but > it is not enough. What this patch does is fence off the MMIO cache > completely when using shadow nested page tables, since we have neither > a GVA nor an L1 GPA to put in the cache. This also allows some > simplifications in kvm_mmu_page_fault and FNAME(page_fault). > > The EPT misconfig likewise does not have an L1 GPA to pass to > kvm_io_bus_write, so that must be skipped for guest mode. The complexity of the mmu and such non-trivial corner case scares me every time :) > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/mmu.c | 10 +++++++++- > arch/x86/kvm/paging_tmpl.h | 3 +-- > arch/x86/kvm/vmx.c | 12 +++++++++--- > arch/x86/kvm/x86.h | 6 +++++- > 4 files changed, 24 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index f5c3f8e7d29f..f3665947bcc5 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3598,6 +3598,14 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) > > static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) > { > + /* > + * A nested guest cannot use the MMIO cache if it is using nested > + * page tables, because cr2 is a nGPA while the cache stores L1's > + * physical addresses. > + */ > + if (mmu_is_nested(vcpu)) > + return false; > + > if (direct) > return vcpu_match_mmio_gpa(vcpu, addr); > > @@ -4827,7 +4835,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, > { > int r, emulation_type = EMULTYPE_RETRY; > enum emulation_result er; > - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); > + bool direct = vcpu->arch.mmu.direct_map; > > /* > * With shadow page tables, fault_address contains a GVA > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 3bb90ceeb52d..86b68dc5a649 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, > &map_writable)) > return 0; > > - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, > - walker.gfn, pfn, walker.pte_access, &r)) > + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) > return r; > > /* > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 79efb00dd70d..e3989461f938 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -6402,10 +6402,16 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) > int ret; > gpa_t gpa; > > + /* > + * A nested guest cannot optimize MMIO vmexits, because we have an > + * nGPA here instead of the required GPA. > + */ > gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); > - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { > - trace_kvm_fast_mmio(gpa); > - return kvm_skip_emulated_instruction(vcpu); > + if (!is_guest_mode(vcpu)) { > + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL) ... could be done, so following code won't be changed. > + trace_kvm_fast_mmio(gpa); > + return kvm_skip_emulated_instruction(vcpu); > + } > } > > ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > index 612067074905..2383d2ce0a84 100644 > --- a/arch/x86/kvm/x86.h > +++ b/arch/x86/kvm/x86.h > @@ -90,7 +90,11 @@ static inline u32 bit(int bitno) > static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, > gva_t gva, gfn_t gfn, unsigned access) > { > - vcpu->arch.mmio_gva = gva & PAGE_MASK; > + /* > + * If this is a shadow nested page table, the "GVA" is > + * actually a nested GPA. nGPA ? (to stick to terminology) > + */ > + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; > vcpu->arch.access = access; > vcpu->arch.mmio_gfn = gfn; > vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; > -- Thanks, David