On Tue, Mar 28, 2023 at 05:21:29PM -0700, David Matlack wrote: > On Mon, Mar 06, 2023 at 02:41:25PM -0800, Vipin Sharma wrote: > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 64de083cd6b9..77d3aa368e5e 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -787,7 +787,7 @@ struct kvm_vcpu_arch { > > struct kvm_mmu *walk_mmu; > > > > struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; > > - struct kvm_mmu_memory_cache mmu_shadow_page_cache; > > + struct kvm_mmu_memory_cache mmu_shadow_page_cache[MAX_NUMNODES]; > > I think we need an abstraction for a NUMA-aware mmu cache, since there > is more than one by the end of this series. > > e.g. A wrapper struct (struct kvm_mmu_numa_memory_cache) or make > NUMA-awareness an optional feature within kvm_mmu_memory_cache, plus > common helper functions for operations like initializing, topping-up, > and freeing. > > I have some ideas I want to try but I ran out of time today. Something like this (compile test only, applies on top of this series): diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 041302d6132c..b44f867d0ed2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -787,7 +787,7 @@ struct kvm_vcpu_arch { struct kvm_mmu *walk_mmu; struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; - struct kvm_mmu_memory_cache mmu_shadow_page_cache[MAX_NUMNODES]; + struct kvm_mmu_numa_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; @@ -1453,7 +1453,7 @@ struct kvm_arch { * * Protected by kvm->slots_lock. */ - struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES]; + struct kvm_mmu_numa_memory_cache split_shadow_page_cache; struct kvm_mmu_memory_cache split_page_header_cache; /* diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5463ce6e52fa..fb7b3932f08d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -702,7 +702,7 @@ static void mmu_free_sp_memory_cache(struct kvm_mmu_memory_cache *cache) static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) { - int r, nid = KVM_MMU_DEFAULT_CACHE_INDEX; + int r; /* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */ r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, @@ -710,16 +710,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) if (r) return r; - if (kvm_numa_aware_page_table_enabled(vcpu->kvm)) { - for_each_online_node(nid) { - r = mmu_topup_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid], - KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE); - } - } else { - r = mmu_topup_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid], - KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE); - } - + r = kvm_mmu_topup_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache, + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE); if (r) return r; @@ -735,12 +727,9 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { - int nid; - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); mutex_lock(&vcpu->arch.mmu_shadow_page_cache_lock); - for_each_node(nid) - mmu_free_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache[nid]); + kvm_mmu_free_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache); mmu_free_sp_memory_cache(&vcpu->arch.mmu_shadowed_info_cache); mutex_unlock(&vcpu->arch.mmu_shadow_page_cache_lock); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); @@ -2262,7 +2251,7 @@ static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu, { struct shadow_page_caches caches = { .page_header_cache = &vcpu->arch.mmu_page_header_cache, - .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache[nid], + .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache.nodes[nid], .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache, }; @@ -5977,7 +5966,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) int kvm_mmu_create(struct kvm_vcpu *vcpu) { - int ret, nid; + int ret; INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_pte_list_desc_cache); vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache; @@ -5985,11 +5974,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_page_header_cache); vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; - for_each_node(nid) { - INIT_KVM_MMU_MEMORY_CACHE(&vcpu->arch.mmu_shadow_page_cache[nid]); - if (kvm_numa_aware_page_table_enabled(vcpu->kvm)) - vcpu->arch.mmu_shadow_page_cache[nid].node = nid; - } + kvm_mmu_init_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache); + if (kvm_numa_aware_page_table_enabled(vcpu->kvm)) + kvm_mmu_enable_numa_memory_cache(&vcpu->arch.mmu_shadow_page_cache); mutex_init(&vcpu->arch.mmu_shadow_page_cache_lock); @@ -6140,7 +6127,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, int kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; - int r, nid; + int r; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); @@ -6159,9 +6146,7 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache); kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; - for_each_node(nid) - INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid]); - + kvm_mmu_init_numa_memory_cache(&kvm->arch.split_shadow_page_cache); INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache); kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; @@ -6171,13 +6156,10 @@ int kvm_mmu_init_vm(struct kvm *kvm) static void mmu_free_vm_memory_caches(struct kvm *kvm) { - int nid; - kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache); kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache); mutex_lock(&kvm->slots_lock); - for_each_node(nid) - mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid]); + kvm_mmu_free_numa_memory_cache(&kvm->arch.split_shadow_page_cache); mutex_unlock(&kvm->slots_lock); } @@ -6299,7 +6281,7 @@ static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid) */ return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) || need_topup(&kvm->arch.split_page_header_cache, 1) || - need_topup(&kvm->arch.split_shadow_page_cache[nid], 1); + need_topup(&kvm->arch.split_shadow_page_cache.nodes[nid], 1); } static int topup_split_caches(struct kvm *kvm, int nid) @@ -6332,7 +6314,7 @@ static int topup_split_caches(struct kvm *kvm, int nid) if (r) return r; - return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid], 1); + return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache.nodes[nid], 1); } static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep, @@ -6357,7 +6339,7 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu /* Direct SPs do not require a shadowed_info_cache. */ caches.page_header_cache = &kvm->arch.split_page_header_cache; - caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid]; + caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache.nodes[nid]; /* Safe to pass NULL for vCPU since requesting a direct SP. */ return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role); @@ -6760,7 +6742,7 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink, list_move_tail(&kvm->vm_list, &vm_list); kvm_for_each_vcpu(i, vcpu, kvm) { - freed += mmu_memory_cache_try_empty(vcpu->arch.mmu_shadow_page_cache, + freed += mmu_memory_cache_try_empty(vcpu->arch.mmu_shadow_page_cache.nodes, MAX_NUMNODES, &vcpu->arch.mmu_shadow_page_cache_lock); freed += mmu_memory_cache_try_empty(&vcpu->arch.mmu_shadowed_info_cache, @@ -6769,7 +6751,7 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink, if (freed >= sc->nr_to_scan) goto out; } - freed += mmu_memory_cache_try_empty(kvm->arch.split_shadow_page_cache, + freed += mmu_memory_cache_try_empty(kvm->arch.split_shadow_page_cache.nodes, MAX_NUMNODES, &kvm->slots_lock); if (freed >= sc->nr_to_scan) goto out; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 63113a66f560..721d5a415807 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -265,7 +265,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu, int nid) struct kvm_mmu_page *sp; sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); - sp->spt = mmu_sp_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache[nid]); + sp->spt = mmu_sp_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache.nodes[nid]); return sp; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d8ea39b248cd..940099629626 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6176,7 +6176,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { - int r, nid; + int r; if (cap->flags) return -EINVAL; @@ -6399,9 +6399,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.numa_aware_page_table = true; mutex_lock(&kvm->slots_lock); - for_each_node(nid) { - kvm->arch.split_shadow_page_cache[nid].node = nid; - } + kvm_mmu_enable_numa_memory_cache(&kvm->arch.split_shadow_page_cache); mutex_unlock(&kvm->slots_lock); r = 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 31586a65e346..d5d966e4a8bf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1365,6 +1365,11 @@ int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_empty_memory_cache(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); + +void kvm_mmu_init_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache); +void kvm_mmu_enable_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache); +int kvm_mmu_topup_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache, int min); +void kvm_mmu_free_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache); #endif void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 13032da2ddfc..7a58ea37bc15 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -101,6 +101,10 @@ struct kvm_mmu_memory_cache { int node; }; +struct kvm_mmu_numa_memory_cache { + struct kvm_mmu_memory_cache nodes[MAX_NUMNODES]; +}; + #define KVM_MMU_MEMORY_CACHE_INIT() { \ .gfp_zero = __GFP_ZERO, \ .node = NUMA_NO_NODE, \ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25a549705c8e..2607b546c3c9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -476,6 +476,43 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) BUG_ON(!p); return p; } + +void kvm_mmu_init_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache) +{ + int node; + + for_each_node(node) + INIT_KVM_MMU_MEMORY_CACHE(&cache->nodes[node]); +} + +void kvm_mmu_enable_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache) +{ + int node; + + for_each_node(node) + cache->nodes[node].node = node; +} + +int kvm_mmu_topup_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache, int min) +{ + int r, node; + + for_each_online_node(node) { + r = kvm_mmu_topup_memory_cache(&cache->nodes[node], min); + if (r) + return r; + } + + return 0; +} + +void kvm_mmu_free_numa_memory_cache(struct kvm_mmu_numa_memory_cache *cache) +{ + int node; + + for_each_node(node) + kvm_mmu_free_memory_cache(&cache->nodes[node]); +} #endif static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)