To avoid saddling a vCPU thread with the work of tearing down an entire paging structure, take a reference on each root before they become obsolete, so that the thread initiating the fast invalidation can tear down the paging structure and (most likely) release the last reference. As a bonus, this teardown can happen under the MMU lock in read mode so as not to block the progress of vCPU threads. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 6 ++++ arch/x86/kvm/mmu/tdp_mmu.c | 74 +++++++++++++++++++++++++++++++++++++- arch/x86/kvm/mmu/tdp_mmu.h | 1 + 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 49b7097fb55b..22742619698d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5455,6 +5455,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) kvm_zap_obsolete_pages(kvm); write_unlock(&kvm->mmu_lock); + + if (is_tdp_mmu_enabled(kvm)) { + read_lock(&kvm->mmu_lock); + kvm_tdp_mmu_zap_all_fast(kvm); + read_unlock(&kvm->mmu_lock); + } } static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 428ff6778426..5498df7e2e1f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -794,13 +794,85 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * kvm_reload_remote_mmus. Since this is in the same critical section, no new * roots will be created between this function and the MMU reload signals * being sent. + * Take a reference on all roots so that this thread can do the bulk of + * the work required to free the roots once they are invalidated. Without + * this reference, a vCPU thread might drop the last reference to a root + * and get stuck with tearing down the entire paging structure. */ void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm) { struct kvm_mmu_page *root; for_each_tdp_mmu_root(kvm, root) - root->role.invalid = true; + if (refcount_inc_not_zero(&root->tdp_mmu_root_count)) + root->role.invalid = true; +} + +static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm, + struct kvm_mmu_page *prev_root) +{ + struct kvm_mmu_page *next_root; + + if (prev_root) + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, + &prev_root->link, + typeof(*prev_root), link); + else + next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, + typeof(*next_root), link); + + while (next_root && !(next_root->role.invalid && + refcount_read(&next_root->tdp_mmu_root_count))) + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, + &next_root->link, + typeof(*next_root), link); + + return next_root; +} + +/* + * Since kvm_tdp_mmu_invalidate_roots has acquired a reference to each + * invalidated root, they will not be freed until this function drops the + * reference. Before dropping that reference, tear down the paging + * structure so that whichever thread does drop the last reference + * only has to do a trivial ammount of work. Since the roots are invalid, + * no new SPTEs should be created under them. + */ +void kvm_tdp_mmu_zap_all_fast(struct kvm *kvm) +{ + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); + struct kvm_mmu_page *next_root; + struct kvm_mmu_page *root; + bool flush = false; + + lockdep_assert_held_read(&kvm->mmu_lock); + + rcu_read_lock(); + + root = next_invalidated_root(kvm, NULL); + + while (root) { + next_root = next_invalidated_root(kvm, root); + + rcu_read_unlock(); + + flush |= zap_gfn_range(kvm, root, 0, max_gfn, true, true); + + /* + * Put the reference acquired in + * kvm_tdp_mmu_invalidate_roots + */ + kvm_tdp_mmu_put_root(kvm, root, true); + + root = next_root; + + rcu_read_lock(); + } + + rcu_read_unlock(); + + if (flush) + kvm_flush_remote_tlbs(kvm); } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index ff4978817fb8..d6d98f9047cd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -24,6 +24,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end, void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm); +void kvm_tdp_mmu_zap_all_fast(struct kvm *kvm); int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, int map_writable, int max_level, kvm_pfn_t pfn, -- 2.31.0.291.g576ba9dcdaf-goog