Reviewed-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> On Wed, Oct 19, 2022 at 04:56:14PM +0000, Sean Christopherson wrote: > Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP > visible to other readers, i.e. before setting its SPTE. This will allow > KVM to query the flag when determining if a shadow page can be replaced > by a NX huge page without violating the rules of the mitigation. > > Note, the shadow/legacy MMU holds mmu_lock for write, so it's impossible > for another CPU to see a shadow page without an up-to-date > nx_huge_page_disallowed, i.e. only the TDP MMU needs the complicated > dance. > > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Reviewed-by: David Matlack <dmatlack@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 28 +++++++++++++++++++--------- > arch/x86/kvm/mmu/mmu_internal.h | 5 ++--- > arch/x86/kvm/mmu/tdp_mmu.c | 31 ++++++++++++++++++------------- > 3 files changed, 39 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 99086a684dd2..57c7c52d137a 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -802,11 +802,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); > } > > -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > - bool nx_huge_page_possible) > +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > - sp->nx_huge_page_disallowed = true; > - > /* > * If it's possible to replace the shadow page with an NX huge page, > * i.e. if the shadow page is the only thing currently preventing KVM > @@ -815,8 +812,7 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > * on the list if KVM is reusing an existing shadow page, i.e. if KVM > * links a shadow page at multiple points. > */ > - if (!nx_huge_page_possible || > - !list_empty(&sp->possible_nx_huge_page_link)) > + if (!list_empty(&sp->possible_nx_huge_page_link)) > return; > > ++kvm->stat.nx_lpage_splits; > @@ -824,6 +820,15 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > &kvm->arch.possible_nx_huge_pages); > } > > +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > + bool nx_huge_page_possible) > +{ > + sp->nx_huge_page_disallowed = true; > + > + if (nx_huge_page_possible) > + track_possible_nx_huge_page(kvm, sp); > +} > + > static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > { > struct kvm_memslots *slots; > @@ -841,10 +846,8 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) > kvm_mmu_gfn_allow_lpage(slot, gfn); > } > > -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > - sp->nx_huge_page_disallowed = false; > - > if (list_empty(&sp->possible_nx_huge_page_link)) > return; > > @@ -852,6 +855,13 @@ void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > list_del_init(&sp->possible_nx_huge_page_link); > } > > +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) > +{ > + sp->nx_huge_page_disallowed = false; > + > + untrack_possible_nx_huge_page(kvm, sp); > +} > + > static struct kvm_memory_slot * > gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, > bool no_dirty_log) > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index 67879459a25c..22152241bd29 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -328,8 +328,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ > > void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); > > -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, > - bool nx_huge_page_possible); > -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); > > #endif /* __KVM_X86_MMU_INTERNAL_H */ > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c > index 73eb28ed1f03..059231c82345 100644 > --- a/arch/x86/kvm/mmu/tdp_mmu.c > +++ b/arch/x86/kvm/mmu/tdp_mmu.c > @@ -403,8 +403,11 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, > lockdep_assert_held_write(&kvm->mmu_lock); > > list_del(&sp->link); > - if (sp->nx_huge_page_disallowed) > - unaccount_nx_huge_page(kvm, sp); > + > + if (sp->nx_huge_page_disallowed) { > + sp->nx_huge_page_disallowed = false; > + untrack_possible_nx_huge_page(kvm, sp); > + } > > if (shared) > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > @@ -1118,16 +1121,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, > * @kvm: kvm instance > * @iter: a tdp_iter instance currently on the SPTE that should be set > * @sp: The new TDP page table to install. > - * @account_nx: True if this page table is being installed to split a > - * non-executable huge page. > * @shared: This operation is running under the MMU lock in read mode. > * > * Returns: 0 if the new page table was installed. Non-0 if the page table > * could not be installed (e.g. the atomic compare-exchange failed). > */ > static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > - struct kvm_mmu_page *sp, bool account_nx, > - bool shared) > + struct kvm_mmu_page *sp, bool shared) > { > u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); > int ret = 0; > @@ -1142,8 +1142,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > > spin_lock(&kvm->arch.tdp_mmu_pages_lock); > list_add(&sp->link, &kvm->arch.tdp_mmu_pages); > - if (account_nx) > - account_nx_huge_page(kvm, sp, true); > spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > tdp_account_mmu_page(kvm, sp); > > @@ -1157,6 +1155,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, > int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > { > struct kvm_mmu *mmu = vcpu->arch.mmu; > + struct kvm *kvm = vcpu->kvm; > struct tdp_iter iter; > struct kvm_mmu_page *sp; > int ret; > @@ -1193,9 +1192,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > } > > if (!is_shadow_present_pte(iter.old_spte)) { > - bool account_nx = fault->huge_page_disallowed && > - fault->req_level >= iter.level; > - > /* > * If SPTE has been frozen by another thread, just > * give up and retry, avoiding unnecessary page table > @@ -1207,10 +1203,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > sp = tdp_mmu_alloc_sp(vcpu); > tdp_mmu_init_child_sp(sp, &iter); > > - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { > + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; > + > + if (tdp_mmu_link_sp(kvm, &iter, sp, true)) { > tdp_mmu_free_sp(sp); > break; > } > + > + if (fault->huge_page_disallowed && > + fault->req_level >= iter.level) { > + spin_lock(&kvm->arch.tdp_mmu_pages_lock); > + track_possible_nx_huge_page(kvm, sp); > + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); > + } > } > } > > @@ -1498,7 +1503,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, > * correctness standpoint since the translation will be the same either > * way. > */ > - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); > + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); > if (ret) > goto out; > > -- > 2.38.0.413.g74048e4d9e-goog >