On Wed, Dec 21, 2022 at 2:24 PM Ben Gardon <bgardon@xxxxxxxxxx> wrote: > > The MMU shrinker currently only operates on the Shadow MMU, but having > the entire implemenatation in shadow_mmu.c is awkward since much of the > function isn't Shadow MMU specific. There has also been talk of changing the > target of the shrinker to the MMU caches rather than already allocated page > tables. As a result, it makes sense to move some of the implementation back > to mmu.c. > > No functional change intended. > > Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++ > arch/x86/kvm/mmu/shadow_mmu.c | 62 ++++++++--------------------------- > arch/x86/kvm/mmu/shadow_mmu.h | 3 +- > 3 files changed, 58 insertions(+), 50 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index dd97e346c786..4c45a5b63356 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3147,6 +3147,49 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink, > return percpu_counter_read_positive(&kvm_total_used_mmu_pages); > } > > +unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) > +{ > + struct kvm *kvm; > + int nr_to_scan = sc->nr_to_scan; > + unsigned long freed = 0; > + > + mutex_lock(&kvm_lock); > + > + list_for_each_entry(kvm, &vm_list, vm_list) { > + /* > + * Never scan more than sc->nr_to_scan VM instances. > + * Will not hit this condition practically since we do not try > + * to shrink more than one VM and it is very unlikely to see > + * !n_used_mmu_pages so many times. > + */ > + if (!nr_to_scan--) > + break; > + > + /* > + * n_used_mmu_pages is accessed without holding kvm->mmu_lock > + * here. We may skip a VM instance errorneosly, but we do not > + * want to shrink a VM that only started to populate its MMU > + * anyway. > + */ > + if (!kvm->arch.n_used_mmu_pages && > + !kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) > + continue; > + > + freed = kvm_shadow_mmu_shrink_scan(kvm, sc->nr_to_scan); > + > + /* > + * unfair on small ones > + * per-vm shrinkers cry out > + * sadness comes quickly > + */ > + list_move_tail(&kvm->vm_list, &vm_list); > + break; > + } > + > + mutex_unlock(&kvm_lock); > + return freed; > +} > + > static struct shrinker mmu_shrinker = { > .count_objects = mmu_shrink_count, > .scan_objects = mmu_shrink_scan, > diff --git a/arch/x86/kvm/mmu/shadow_mmu.c b/arch/x86/kvm/mmu/shadow_mmu.c > index 090b4788f7de..1259c4a3b140 100644 > --- a/arch/x86/kvm/mmu/shadow_mmu.c > +++ b/arch/x86/kvm/mmu/shadow_mmu.c > @@ -3147,7 +3147,7 @@ void kvm_zap_obsolete_pages(struct kvm *kvm) > kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); > } > > -static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) > +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm) Function renaming and removing static should be two separate commits. > { > return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); > } > @@ -3416,60 +3416,24 @@ void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, > kvm_arch_flush_remote_tlbs_memslot(kvm, slot); > } > > -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) > +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free) > { > - struct kvm *kvm; > - int nr_to_scan = sc->nr_to_scan; > unsigned long freed = 0; > + int idx; > > - mutex_lock(&kvm_lock); > - > - list_for_each_entry(kvm, &vm_list, vm_list) { > - int idx; > - LIST_HEAD(invalid_list); > - > - /* > - * Never scan more than sc->nr_to_scan VM instances. > - * Will not hit this condition practically since we do not try > - * to shrink more than one VM and it is very unlikely to see > - * !n_used_mmu_pages so many times. > - */ > - if (!nr_to_scan--) > - break; > - /* > - * n_used_mmu_pages is accessed without holding kvm->mmu_lock > - * here. We may skip a VM instance errorneosly, but we do not > - * want to shrink a VM that only started to populate its MMU > - * anyway. > - */ > - if (!kvm->arch.n_used_mmu_pages && > - !kvm_has_zapped_obsolete_pages(kvm)) > - continue; > - > - idx = srcu_read_lock(&kvm->srcu); > - write_lock(&kvm->mmu_lock); > - > - if (kvm_has_zapped_obsolete_pages(kvm)) { > - kvm_mmu_commit_zap_page(kvm, > - &kvm->arch.zapped_obsolete_pages); > - goto unlock; > - } > + idx = srcu_read_lock(&kvm->srcu); > + write_lock(&kvm->mmu_lock); > > - freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan); > + if (kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) { > + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); > + goto out; > + } > > -unlock: > - write_unlock(&kvm->mmu_lock); > - srcu_read_unlock(&kvm->srcu, idx); > + freed = kvm_mmu_zap_oldest_mmu_pages(kvm, pages_to_free); > > - /* > - * unfair on small ones > - * per-vm shrinkers cry out > - * sadness comes quickly > - */ > - list_move_tail(&kvm->vm_list, &vm_list); > - break; > - } > +out: > + write_unlock(&kvm->mmu_lock); > + srcu_read_unlock(&kvm->srcu, idx); > > - mutex_unlock(&kvm_lock); > return freed; > } > diff --git a/arch/x86/kvm/mmu/shadow_mmu.h b/arch/x86/kvm/mmu/shadow_mmu.h > index 20c65a0ea52c..9952aa1e86cf 100644 > --- a/arch/x86/kvm/mmu/shadow_mmu.h > +++ b/arch/x86/kvm/mmu/shadow_mmu.h > @@ -99,7 +99,8 @@ void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm, > void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, > const struct kvm_memory_slot *slot); > > -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc); > +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm); > +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free); > > /* Exports from paging_tmpl.h */ > gpa_t paging32_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > -- > 2.39.0.314.g84b9a713c41-goog >