From: David Matlack <dmatlack@xxxxxxxxxx> The memslot for the faulting gfn is used throughout the page fault handling code, so capture it in kvm_page_fault as soon as we know the gfn and use it in the page fault handling code that has direct access to the kvm_page_fault struct. Replace various tests using is_noslot_pfn with more direct tests on fault->slot being NULL. This, in combination with the subsequent patch, improves "Populate memory time" in dirty_log_perf_test by 5% when using the legacy MMU. There is no discerable improvement to the performance of the TDP MMU. No functional change intended. Suggested-by: Ben Gardon <bgardon@xxxxxxxxxx> Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> Message-Id: <20210813203504.2742757-4-dmatlack@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/kvm/mmu.h | 3 +++ arch/x86/kvm/mmu/mmu.c | 32 ++++++++++++-------------------- arch/x86/kvm/mmu/paging_tmpl.h | 6 ++++-- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 01a4d1bc5053..75367af1a6d3 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -158,6 +158,9 @@ struct kvm_page_fault { /* Shifted addr, or result of guest page table walk if addr is a gva. */ gfn_t gfn; + /* The memslot containing gfn. May be NULL. */ + struct kvm_memory_slot *slot; + /* Outputs of kvm_faultin_pfn. */ kvm_pfn_t pfn; hva_t hva; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5a757953b98b..754578458cb7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2907,7 +2907,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - struct kvm_memory_slot *slot; + struct kvm_memory_slot *slot = fault->slot; kvm_pfn_t mask; fault->huge_page_disallowed = fault->exec && fault->nx_huge_page_workaround_enabled; @@ -2918,8 +2918,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn)) return; - slot = gfn_to_memslot_dirty_bitmap(vcpu, fault->gfn, true); - if (!slot) + if (kvm_slot_dirty_track_enabled(slot)) return; /* @@ -3043,7 +3042,7 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa return true; } - if (unlikely(is_noslot_pfn(fault->pfn))) { + if (unlikely(!fault->slot)) { gva_t gva = fault->is_tdp ? 0 : fault->addr; vcpu_cache_mmio_info(vcpu, gva, fault->gfn, @@ -3097,13 +3096,9 @@ static bool page_fault_can_be_fast(struct kvm_page_fault *fault) * someone else modified the SPTE from its original value. */ static bool -fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, u64 *sptep, u64 old_spte, u64 new_spte) { - gfn_t gfn; - - WARN_ON(!sp->role.direct); - /* * Theoretically we could also set dirty bit (and flush TLB) here in * order to eliminate unnecessary PML logging. See comments in @@ -3119,14 +3114,8 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) return false; - if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { - /* - * The gfn of direct spte is stable since it is - * calculated by sp->gfn. - */ - gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); - kvm_vcpu_mark_page_dirty(vcpu, gfn); - } + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) + mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); return true; } @@ -3251,7 +3240,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * since the gfn is not stable for indirect shadow page. See * Documentation/virt/kvm/locking.rst to get more detail. */ - if (fast_pf_fix_direct_spte(vcpu, sp, sptep, spte, new_spte)) { + if (fast_pf_fix_direct_spte(vcpu, fault, sptep, spte, new_spte)) { ret = RET_PF_FIXED; break; } @@ -3863,7 +3852,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r) { - struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); + struct kvm_memory_slot *slot = fault->slot; bool async; /* @@ -3877,6 +3866,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (!kvm_is_visible_memslot(slot)) { /* Don't expose private memslots to L2. */ if (is_guest_mode(vcpu)) { + fault->slot = NULL; fault->pfn = KVM_PFN_NOSLOT; fault->map_writable = false; return false; @@ -3928,6 +3918,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault int r; fault->gfn = fault->addr >> PAGE_SHIFT; + fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); + if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_EMULATE; @@ -3955,7 +3947,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else write_lock(&vcpu->kvm->mmu_lock); - if (!is_noslot_pfn(fault->pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 7a8a2d14a3c7..e4c7bf3deac8 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -861,6 +861,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault } fault->gfn = walker.gfn; + fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); + if (page_fault_handle_page_track(vcpu, fault)) { shadow_page_table_clear_flood(vcpu, fault->addr); return RET_PF_EMULATE; @@ -894,7 +896,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * we will cache the incorrect access into mmio spte. */ if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) && - !is_cr0_wp(vcpu->arch.mmu) && !fault->user && !is_noslot_pfn(fault->pfn)) { + !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) { walker.pte_access |= ACC_WRITE_MASK; walker.pte_access &= ~ACC_USER_MASK; @@ -910,7 +912,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); - if (!is_noslot_pfn(fault->pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 2d92a5b54ded..3e10658cf0d7 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -900,7 +900,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int ret = RET_PF_FIXED; int make_spte_ret = 0; - if (unlikely(is_noslot_pfn(fault->pfn))) + if (unlikely(!fault->slot)) new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); else make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, -- 2.27.0