Reviewed-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> On Wed, Oct 19, 2022 at 04:56:15PM +0000, Sean Christopherson wrote: > Track the number of TDP MMU "shadow" pages instead of tracking the pages > themselves. With the NX huge page list manipulation moved out of the common > linking flow, elminating the list-based tracking means the happy path of > adding a shadow page doesn't need to acquire a spinlock and can instead > inc/dec an atomic. > > Keep the tracking as the WARN during TDP MMU teardown on leaked shadow > pages is very, very useful for detecting KVM bugs. > > Tracking the number of pages will also make it trivial to expose the > counter to userspace as a stat in the future, which may or may not be > desirable. > > Note, the TDP MMU needs to use a separate counter (and stat if that ever > comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother > supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the > TDP MMU consumes so few pages relative to shadow paging), and including TDP > MMU pages in that counter would break both the shrinker and shadow MMUs, > e.g. if a VM is using nested TDP. > > Cc: Yan Zhao <yan.y.zhao@xxxxxxxxx> > Reviewed-by: Mingwei Zhang <mizhang@xxxxxxxxxx> > Reviewed-by: David Matlack <dmatlack@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 11 +++-------- > arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++----------- > 2 files changed, 12 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 0333dbb8ec85..bbd2cecd34cb 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1290,6 +1290,9 @@ struct kvm_arch { > */ > bool tdp_mmu_enabled; > > + /* The number of TDP MMU pages across all roots. */ > + atomic64_t tdp_mmu_pages; > + > /* > * List of kvm_mmu_page structs being used as roots. > * All kvm_mmu_page structs in the list should have > @@ -1310,18 +1313,10 @@ struct kvm_arch { > */ > struct list_head tdp_mmu_roots; > > - /* > - * List of kvm_mmu_page structs not being used as roots. > - * All kvm_mmu_page structs in the list should have > - * tdp_mmu_page set and a tdp_mmu_root_count of 0. > - */ > - struct list_head tdp_mmu_pages; > - > /* > * Protects accesses to the following fields when the MMU lock > * is held in read mode: > * - tdp_mmu_roots (above) > - * - tdp_mmu_pages (above) > * - the link field of kvm_mmu_page structs used by the TDP MMU > * - possible_nx_huge_pages; > * - the possible_nx_huge_page_link field of kvm_mmu_page structs used > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c > index 059231c82345..4e5b3ae824c1 100644 > --- a/arch/x86/kvm/mmu/tdp_mmu.c > +++ b/arch/x86/kvm/mmu/tdp_mmu.c > @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) > kvm->arch.tdp_mmu_enabled = true; > INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); > spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); > - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); > kvm->arch.tdp_mmu_zap_wq = wq; > return 1; > } > @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) > /* Also waits for any queued work items. */ > destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); > > - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); > + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); > WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); > > /* > @@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, > static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > kvm_account_pgtable_pages((void *)sp->spt, +1); > + atomic64_inc(&kvm->arch.tdp_mmu_pages); > } > > static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > kvm_account_pgtable_pages((void *)sp->spt, -1); > + atomic64_dec(&kvm->arch.tdp_mmu_pages); > } > > /** > @@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, > bool shared) > { > tdp_unaccount_mmu_page(kvm, sp); > + > + if (!sp->nx_huge_page_disallowed) > + return; > + > if (shared) > spin_lock(&kvm->arch.tdp_mmu_pages_lock); > else > lockdep_assert_held_write(&kvm->mmu_lock); > > - list_del(&sp->link); > - > - if (sp->nx_huge_page_disallowed) { > - sp->nx_huge_page_disallowed = false; > - untrack_possible_nx_huge_page(kvm, sp); > - } > + sp->nx_huge_page_disallowed = false; > + untrack_possible_nx_huge_page(kvm, sp); > > if (shared) > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > @@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > tdp_mmu_set_spte(kvm, iter, spte); > } > > - spin_lock(&kvm->arch.tdp_mmu_pages_lock); > - list_add(&sp->link, &kvm->arch.tdp_mmu_pages); > - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > tdp_account_mmu_page(kvm, sp); > > return 0; > -- > 2.38.0.413.g74048e4d9e-goog >