Fix two intertwined bugs in the NX huge page zapping that were introduced by the incorporation of the TDP MMU. Because there is a unified list of NX huge pages, zapping can encounter both TDP MMU and legacy MMU pages, and the two MMUs have different tracking for TLB flushing. If one flavor needs a flush, but the code for the other flavor yields, KVM will fail to flush before yielding. First, honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(), which does the flush itself if and only if it yields, and otherwise expects the caller to do the flush. This requires feeding the result into kvm_mmu_remote_flush_or_zap(), and so also fixes the case where the TDP MMU needs a flush, the legacy MMU does not, and the main loop yields. Second, tell the TDP MMU a flush is pending if the list of zapped pages from legacy MMUs is not empty, i.e. the legacy MMU needs a flush. This fixes the case where the TDP MMU yields, but it iteslf does not require a flush. Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU") Cc: stable@xxxxxxxxxxxxxxx Cc: Ben Gardon <bgardon@xxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 15 ++++++++++----- arch/x86/kvm/mmu/tdp_mmu.c | 6 +++--- arch/x86/kvm/mmu/tdp_mmu.h | 3 ++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c6ed633594a2..413d6259340e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5517,7 +5517,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) } if (is_tdp_mmu_enabled(kvm)) { - flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end); + flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end, + false); if (flush) kvm_flush_remote_tlbs(kvm); } @@ -5939,6 +5940,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) struct kvm_mmu_page *sp; unsigned int ratio; LIST_HEAD(invalid_list); + bool flush = false; + gfn_t gfn_end; ulong to_zap; rcu_idx = srcu_read_lock(&kvm->srcu); @@ -5960,19 +5963,21 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) lpage_disallowed_link); WARN_ON_ONCE(!sp->lpage_disallowed); if (is_tdp_mmu_page(sp)) { - kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, - sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level)); + gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level); + flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end, + flush || !list_empty(&invalid_list)); } else { kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); WARN_ON_ONCE(sp->lpage_disallowed); } if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { - kvm_mmu_commit_zap_page(kvm, &invalid_list); + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); cond_resched_rwlock_write(&kvm->mmu_lock); + flush = false; } } - kvm_mmu_commit_zap_page(kvm, &invalid_list); + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, rcu_idx); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6cf08c3c537f..367f12bf1026 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -709,10 +709,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, * SPTEs have been cleared and a TLB flush is needed before releasing the * MMU lock. */ -bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) +bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end, + bool flush) { struct kvm_mmu_page *root; - bool flush = false; for_each_tdp_mmu_root_yield_safe(kvm, root) flush = zap_gfn_range(kvm, root, start, end, true, flush); @@ -725,7 +725,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush; - flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); + flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn, false); if (flush) kvm_flush_remote_tlbs(kvm); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 3b761c111bff..e39bee52d49e 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -8,7 +8,8 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root); -bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end); +bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end, + bool flush); void kvm_tdp_mmu_zap_all(struct kvm *kvm); int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, -- 2.31.0.rc2.261.g7f71774620-goog