Protect the contents of the TDP MMU roots list with RCU in preparation for a future patch which will allow the iterator macro to be used under the MMU lock in read mode. Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- Changelog v2: -- add lockdep condition for tdp_mmu_pages_lock to for_each_tdp_mmu_root -- fix problem with unexported lockdep function -- updated comments in kvm_host.h arch/x86/include/asm/kvm_host.h | 21 +++++++--- arch/x86/kvm/mmu/tdp_mmu.c | 69 +++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 99778ac51243..e02e8b8a875b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1050,25 +1050,36 @@ struct kvm_arch { bool tdp_mmu_enabled; /* - * List of struct kvmp_mmu_pages being used as roots. + * List of struct kvm_mmu_pages being used as roots. * All struct kvm_mmu_pages in the list should have * tdp_mmu_page set. - * All struct kvm_mmu_pages in the list should have a positive - * root_count except when a thread holds the MMU lock and is removing - * an entry from the list. + * + * For reads, this list is protected by: + * the MMU lock in read mode + RCU or + * the MMU lock in write mode + * + * For writes, this list is protected by: + * the MMU lock in read mode + the tdp_mmu_pages_lock or + * the MMU lock in write mode + * + * Roots will remain in the list until their tdp_mmu_root_count + * drops to zero, at which point the thread that decremented the + * count to zero should removed the root from the list and clean + * it up, freeing the root after an RCU grace period. */ struct list_head tdp_mmu_roots; /* * List of struct kvmp_mmu_pages not being used as roots. * All struct kvm_mmu_pages in the list should have - * tdp_mmu_page set and a root_count of 0. + * tdp_mmu_page set and a tdp_mmu_root_count of 0. */ struct list_head tdp_mmu_pages; /* * Protects accesses to the following fields when the MMU lock * is held in read mode: + * - tdp_mmu_roots (above) * - tdp_mmu_pages (above) * - the link field of struct kvm_mmu_pages used by the TDP MMU * - lpage_disallowed_mmu_pages diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 886bc170f2a5..c1d7f6b86870 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -50,6 +50,22 @@ static void tdp_mmu_free_sp(struct kvm_mmu_page *sp) kmem_cache_free(mmu_page_header_cache, sp); } +/* + * This is called through call_rcu in order to free TDP page table memory + * safely with respect to other kernel threads that may be operating on + * the memory. + * By only accessing TDP MMU page table memory in an RCU read critical + * section, and freeing it after a grace period, lockless access to that + * memory won't use it after it is freed. + */ +static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) +{ + struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page, + rcu_head); + + tdp_mmu_free_sp(sp); +} + void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) { gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); @@ -61,11 +77,13 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) WARN_ON(!root->tdp_mmu_page); - list_del(&root->link); + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + list_del_rcu(&root->link); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); zap_gfn_range(kvm, root, 0, max_gfn, false, false); - tdp_mmu_free_sp(root); + call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } /* @@ -82,18 +100,21 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, lockdep_assert_held_write(&kvm->mmu_lock); + rcu_read_lock(); + if (prev_root) - next_root = list_next_entry(prev_root, link); + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, + &prev_root->link, + typeof(*prev_root), link); else - next_root = list_first_entry(&kvm->arch.tdp_mmu_roots, - typeof(*next_root), link); + next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, + typeof(*next_root), link); - while (!list_entry_is_head(next_root, &kvm->arch.tdp_mmu_roots, link) && - !kvm_tdp_mmu_get_root(kvm, next_root)) - next_root = list_next_entry(next_root, link); + while (next_root && !kvm_tdp_mmu_get_root(kvm, next_root)) + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, + &next_root->link, typeof(*next_root), link); - if (list_entry_is_head(next_root, &kvm->arch.tdp_mmu_roots, link)) - next_root = NULL; + rcu_read_unlock(); if (prev_root) kvm_tdp_mmu_put_root(kvm, prev_root); @@ -107,15 +128,17 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * if exiting the loop early, the caller must drop the reference to the most * recent root. (Unless keeping a live reference is desirable.) */ -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ for (_root = tdp_mmu_next_root(_kvm, NULL); \ _root; \ _root = tdp_mmu_next_root(_kvm, _root)) \ if (kvm_mmu_page_as_id(_root) != _as_id) { \ } else -#define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ - list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \ +#define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ + list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \ + lockdep_is_held_type(&kvm->mmu_lock, 0) || \ + lockdep_is_help(&kvm->arch.tdp_mmu_pages_lock)) \ if (kvm_mmu_page_as_id(_root) != _as_id) { \ } else @@ -171,28 +194,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); refcount_set(&root->tdp_mmu_root_count, 1); - list_add(&root->link, &kvm->arch.tdp_mmu_roots); + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); out: return __pa(root->spt); } -/* - * This is called through call_rcu in order to free TDP page table memory - * safely with respect to other kernel threads that may be operating on - * the memory. - * By only accessing TDP MMU page table memory in an RCU read critical - * section, and freeing it after a grace period, lockless access to that - * memory won't use it after it is freed. - */ -static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) -{ - struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page, - rcu_head); - - tdp_mmu_free_sp(sp); -} - static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, u64 old_spte, u64 new_spte, int level, bool shared); -- 2.31.0.208.g409f899ff0-goog