Hi David, On Mon, Aug 04, 2014 at 02:10:20PM -0700, David Matlack wrote: >The following events can lead to an incorrect KVM_EXIT_MMIO bubbling >up to userspace: > >(1) Guest accesses gpa X without a memory slot. The gfn is cached in >struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets >the SPTE write-execute-noread so that future accesses cause >EPT_MISCONFIGs. > >(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION >covering the page just accessed. > One question: Who trigger host userspace creates a mmio memslot? It will be created just after first mmio #PF? Regards, Wanpeng Li >(3) Guest attempts to read or write to gpa X again. On Intel, this >generates an EPT_MISCONFIG. The memory slot generation number that >was incremented in (2) would normally take care of this but we fast >path mmio faults through quickly_check_mmio_pf(), which only checks >the per-vcpu mmio cache. Since we hit the cache, KVM passes a >KVM_EXIT_MMIO up to userspace. > >This patch fixes the issue by doing the following: > - Tag the mmio cache with the memslot generation and use it to > validate mmio cache lookups. > - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to > mmio_gva, since both can be used to fast path mmio faults. > - In mmu_sync_roots, unconditionally clear the mmio cache since > even direct_map (e.g. tdp) hosts use it. > >Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> >--- >Changes in v2: > - Use memslot generation to invalidate the mmio cache rather than > actively invalidating the cache. > - Update patch description with new cache invalidation technique. > - Pull mmio cache/clear code up out of x86.h and mmu.c and into > mmu.h. > > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/mmu.c | 16 ++-------- > arch/x86/kvm/mmu.h | 70 +++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.h | 36 --------------------- > 4 files changed, 73 insertions(+), 50 deletions(-) > >diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >index 49205d0..f518d14 100644 >--- a/arch/x86/include/asm/kvm_host.h >+++ b/arch/x86/include/asm/kvm_host.h >@@ -479,6 +479,7 @@ struct kvm_vcpu_arch { > u64 mmio_gva; > unsigned access; > gfn_t mmio_gfn; >+ unsigned int mmio_gen; > > struct kvm_pmu pmu; > >diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >index 9314678..43f1c18 100644 >--- a/arch/x86/kvm/mmu.c >+++ b/arch/x86/kvm/mmu.c >@@ -206,11 +206,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); > #define MMIO_SPTE_GEN_LOW_SHIFT 3 > #define MMIO_SPTE_GEN_HIGH_SHIFT 52 > >-#define MMIO_GEN_SHIFT 19 > #define MMIO_GEN_LOW_SHIFT 9 > #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) >-#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) >-#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) > > static u64 generation_mmio_spte_mask(unsigned int gen) > { >@@ -234,16 +231,6 @@ static unsigned int get_mmio_spte_generation(u64 spte) > return gen; > } > >-static unsigned int kvm_current_mmio_generation(struct kvm *kvm) >-{ >- /* >- * Init kvm generation close to MMIO_MAX_GEN to easily test the >- * code of handling generation number wrap-around. >- */ >- return (kvm_memslots(kvm)->generation + >- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; >-} >- > static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, > unsigned access) > { >@@ -3157,13 +3144,14 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) > int i; > struct kvm_mmu_page *sp; > >+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); >+ > if (vcpu->arch.mmu.direct_map) > return; > > if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) > return; > >- vcpu_clear_mmio_info(vcpu, ~0ul); > kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); > if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { > hpa_t root = vcpu->arch.mmu.root_hpa; >diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h >index b982112..058651a 100644 >--- a/arch/x86/kvm/mmu.h >+++ b/arch/x86/kvm/mmu.h >@@ -82,6 +82,76 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, > void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > bool ept); > >+#define MMIO_GEN_SHIFT 19 >+#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) >+#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) >+static inline unsigned int kvm_current_mmio_generation(struct kvm *kvm) >+{ >+ /* >+ * Init kvm generation close to MMIO_MAX_GEN to easily test the >+ * code of handling generation number wrap-around. >+ */ >+ return (kvm_memslots(kvm)->generation + MMIO_MAX_GEN - 150) & >+ MMIO_GEN_MASK; >+} >+ >+static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, >+ gva_t gva, gfn_t gfn, unsigned access) >+{ >+ vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm); >+ >+ /* >+ * Ensure that the mmio_gen is set before the rest of the cache entry. >+ * Otherwise we might see a new generation number attached to an old >+ * cache entry if creating/deleting a memslot races with mmio caching. >+ * The inverse case is possible (old generation number with new cache >+ * info), but that is safe. The next access will just miss the cache >+ * when it should have hit. >+ */ >+ smp_wmb(); >+ >+ vcpu->arch.mmio_gva = gva & PAGE_MASK; >+ vcpu->arch.access = access; >+ vcpu->arch.mmio_gfn = gfn; >+} >+ >+/* >+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, >+ * unconditionally clear the mmio cache. >+ */ >+#define MMIO_GVA_ANY ~((gva_t) 0) >+static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) >+{ >+ if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) >+ return; >+ >+ vcpu->arch.mmio_gva = 0; >+ vcpu->arch.mmio_gfn = 0; >+} >+ >+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) >+{ >+ return vcpu->arch.mmio_gen == kvm_current_mmio_generation(vcpu->kvm); >+} >+ >+static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) >+{ >+ u64 mmio_gva = vcpu->arch.mmio_gva; >+ >+ return vcpu_match_mmio_gen(vcpu) && >+ mmio_gva && >+ mmio_gva == (gva & PAGE_MASK); >+} >+ >+static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) >+{ >+ gfn_t mmio_gfn = vcpu->arch.mmio_gfn; >+ >+ return vcpu_match_mmio_gen(vcpu) && >+ mmio_gfn && >+ mmio_gfn == (gpa >> PAGE_SHIFT); >+} >+ > static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) > { > if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) >diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h >index 8c97bac..c686c91 100644 >--- a/arch/x86/kvm/x86.h >+++ b/arch/x86/kvm/x86.h >@@ -72,42 +72,6 @@ static inline u32 bit(int bitno) > return 1 << (bitno & 31); > } > >-static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, >- gva_t gva, gfn_t gfn, unsigned access) >-{ >- vcpu->arch.mmio_gva = gva & PAGE_MASK; >- vcpu->arch.access = access; >- vcpu->arch.mmio_gfn = gfn; >-} >- >-/* >- * Clear the mmio cache info for the given gva, >- * specially, if gva is ~0ul, we clear all mmio cache info. >- */ >-static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) >-{ >- if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) >- return; >- >- vcpu->arch.mmio_gva = 0; >-} >- >-static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) >-{ >- if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) >- return true; >- >- return false; >-} >- >-static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) >-{ >- if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) >- return true; >- >- return false; >-} >- > void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); > void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); > int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); >-- >2.0.0.526.g5318336 > >-- >To unsubscribe from this list: send the line "unsubscribe kvm" in >the body of a message to majordomo@xxxxxxxxxxxxxxx >More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html