On Fri, May 08, 2020 at 11:24:25AM -0700, Jon Cargille wrote: > From: Peter Feiner <pfeiner@xxxxxxxxxx> > > Optimization for avoiding lookups in mmu_page_hash. When there's a > single direct root, a shadow page has at most one parent SPTE > (non-root SPs have exactly one; the root has none). Thus, if an SPTE > is non-present, it can be linked to a newly allocated SP without > first checking if the SP already exists. Some mechanical comments below. I'll think through the actual logic next week, my brain needs to be primed anytime the MMU is involved :-) > This optimization has proven significant in batch large SP shattering > where the hash lookup accounted for 95% of the overhead. > > Signed-off-by: Peter Feiner <pfeiner@xxxxxxxxxx> > Signed-off-by: Jon Cargille <jcargill@xxxxxxxxxx> > Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> > > --- > arch/x86/include/asm/kvm_host.h | 13 ++++++++ > arch/x86/kvm/mmu/mmu.c | 55 +++++++++++++++++++-------------- > 2 files changed, 45 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index a239a297be33..9b70d764b626 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -913,6 +913,19 @@ struct kvm_arch { > struct kvm_page_track_notifier_node mmu_sp_tracker; > struct kvm_page_track_notifier_head track_notifier_head; > > + /* > + * Optimization for avoiding lookups in mmu_page_hash. When there's a > + * single direct root, a shadow page has at most one parent SPTE > + * (non-root SPs have exactly one; the root has none). Thus, if an SPTE > + * is non-present, it can be linked to a newly allocated SP without > + * first checking if the SP already exists. > + * > + * False initially because there are no indirect roots. > + * > + * Guarded by mmu_lock. > + */ > + bool shadow_page_may_have_multiple_parents; Why make this a one-way bool? Wouldn't it be better to let this transition back to '0' once all nested guests go away? And maybe a shorter name that reflects what it tracks instead of how its used, e.g. has_indirect_mmu or indirect_mmu_count. > + > struct list_head assigned_dev_head; > struct iommu_domain *iommu_domain; > bool iommu_noncoherent; > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e618472c572b..d94552b0ed77 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -2499,35 +2499,40 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, > quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; > role.quadrant = quadrant; > } > - for_each_valid_sp(vcpu->kvm, sp, gfn) { > - if (sp->gfn != gfn) { > - collisions++; > - continue; > - } > > - if (!need_sync && sp->unsync) > - need_sync = true; > + if (vcpu->kvm->arch.shadow_page_may_have_multiple_parents || > + level == vcpu->arch.mmu->root_level) { Might be worth a goto to preserve the for-loop. > + for_each_valid_sp(vcpu->kvm, sp, gfn) { > + if (sp->gfn != gfn) { > + collisions++; > + continue; > + } > > - if (sp->role.word != role.word) > - continue; > + if (!need_sync && sp->unsync) > + need_sync = true; > > - if (sp->unsync) { > - /* The page is good, but __kvm_sync_page might still end > - * up zapping it. If so, break in order to rebuild it. > - */ > - if (!__kvm_sync_page(vcpu, sp, &invalid_list)) > - break; > + if (sp->role.word != role.word) > + continue; > > - WARN_ON(!list_empty(&invalid_list)); > - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); > - } > + if (sp->unsync) { > + /* The page is good, but __kvm_sync_page might > + * still end up zapping it. If so, break in > + * order to rebuild it. > + */ > + if (!__kvm_sync_page(vcpu, sp, &invalid_list)) > + break; > > - if (sp->unsync_children) > - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); > + WARN_ON(!list_empty(&invalid_list)); > + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); > + } > > - __clear_sp_write_flooding_count(sp); > - trace_kvm_mmu_get_page(sp, false); > - goto out; > + if (sp->unsync_children) > + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); > + > + __clear_sp_write_flooding_count(sp); > + trace_kvm_mmu_get_page(sp, false); > + goto out; > + } > } > > ++vcpu->kvm->stat.mmu_cache_miss; > @@ -3735,6 +3740,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) > gfn_t root_gfn, root_pgd; > int i; > > + spin_lock(&vcpu->kvm->mmu_lock); > + vcpu->kvm->arch.shadow_page_may_have_multiple_parents = true; > + spin_unlock(&vcpu->kvm->mmu_lock); Taking the lock every time is unnecessary, even if this is changed to a refcount type variable, e.g. if (!has_indirect_mmu) { lock_and_set } or if (atomic_inc_return(&indirect_mmu_count) == 1) lock_and_unlock; > + > root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu); > root_gfn = root_pgd >> PAGE_SHIFT; > > -- > 2.26.2.303.gf8c07b1a785-goog >