From: Peter Feiner <pfeiner@xxxxxxxxxx> Optimization for avoiding lookups in mmu_page_hash. When there's a single direct root, a shadow page has at most one parent SPTE (non-root SPs have exactly one; the root has none). Thus, if an SPTE is non-present, it can be linked to a newly allocated SP without first checking if the SP already exists. This optimization has proven significant in batch large SP shattering where the hash lookup accounted for 95% of the overhead. Signed-off-by: Peter Feiner <pfeiner@xxxxxxxxxx> Signed-off-by: Jon Cargille <jcargill@xxxxxxxxxx> Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 13 ++++++++ arch/x86/kvm/mmu/mmu.c | 55 +++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a239a297be33..9b70d764b626 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -913,6 +913,19 @@ struct kvm_arch { struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Optimization for avoiding lookups in mmu_page_hash. When there's a + * single direct root, a shadow page has at most one parent SPTE + * (non-root SPs have exactly one; the root has none). Thus, if an SPTE + * is non-present, it can be linked to a newly allocated SP without + * first checking if the SP already exists. + * + * False initially because there are no indirect roots. + * + * Guarded by mmu_lock. + */ + bool shadow_page_may_have_multiple_parents; + struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; bool iommu_noncoherent; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e618472c572b..d94552b0ed77 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2499,35 +2499,40 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - for_each_valid_sp(vcpu->kvm, sp, gfn) { - if (sp->gfn != gfn) { - collisions++; - continue; - } - if (!need_sync && sp->unsync) - need_sync = true; + if (vcpu->kvm->arch.shadow_page_may_have_multiple_parents || + level == vcpu->arch.mmu->root_level) { + for_each_valid_sp(vcpu->kvm, sp, gfn) { + if (sp->gfn != gfn) { + collisions++; + continue; + } - if (sp->role.word != role.word) - continue; + if (!need_sync && sp->unsync) + need_sync = true; - if (sp->unsync) { - /* The page is good, but __kvm_sync_page might still end - * up zapping it. If so, break in order to rebuild it. - */ - if (!__kvm_sync_page(vcpu, sp, &invalid_list)) - break; + if (sp->role.word != role.word) + continue; - WARN_ON(!list_empty(&invalid_list)); - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } + if (sp->unsync) { + /* The page is good, but __kvm_sync_page might + * still end up zapping it. If so, break in + * order to rebuild it. + */ + if (!__kvm_sync_page(vcpu, sp, &invalid_list)) + break; - if (sp->unsync_children) - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + WARN_ON(!list_empty(&invalid_list)); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } - __clear_sp_write_flooding_count(sp); - trace_kvm_mmu_get_page(sp, false); - goto out; + if (sp->unsync_children) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + + __clear_sp_write_flooding_count(sp); + trace_kvm_mmu_get_page(sp, false); + goto out; + } } ++vcpu->kvm->stat.mmu_cache_miss; @@ -3735,6 +3740,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) gfn_t root_gfn, root_pgd; int i; + spin_lock(&vcpu->kvm->mmu_lock); + vcpu->kvm->arch.shadow_page_may_have_multiple_parents = true; + spin_unlock(&vcpu->kvm->mmu_lock); + root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu); root_gfn = root_pgd >> PAGE_SHIFT; -- 2.26.2.303.gf8c07b1a785-goog