Re: [RFC 07/14] KVM: x86/MMU: Cleanup shrinker interface with Shadow MMU

Vipin Sharma <vipinsh@xxxxxxxxxx> · Fri, 13 Jan 2023 09:51:43 -0800

On Wed, Dec 21, 2022 at 2:24 PM Ben Gardon <bgardon@xxxxxxxxxx> wrote:
>
> The MMU shrinker currently only operates on the Shadow MMU, but having
> the entire implemenatation in shadow_mmu.c is awkward since much of the
> function isn't Shadow MMU specific. There has also been talk of changing the
> target of the shrinker to the MMU caches rather than already allocated page
> tables. As a result, it makes sense to move some of the implementation back
> to mmu.c.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
> ---
>  arch/x86/kvm/mmu/mmu.c        | 43 ++++++++++++++++++++++++
>  arch/x86/kvm/mmu/shadow_mmu.c | 62 ++++++++---------------------------
>  arch/x86/kvm/mmu/shadow_mmu.h |  3 +-
>  3 files changed, 58 insertions(+), 50 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index dd97e346c786..4c45a5b63356 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3147,6 +3147,49 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink,
>         return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
>  }
>
> +unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +       struct kvm *kvm;
> +       int nr_to_scan = sc->nr_to_scan;
> +       unsigned long freed = 0;
> +
> +       mutex_lock(&kvm_lock);
> +
> +       list_for_each_entry(kvm, &vm_list, vm_list) {
> +               /*
> +                * Never scan more than sc->nr_to_scan VM instances.
> +                * Will not hit this condition practically since we do not try
> +                * to shrink more than one VM and it is very unlikely to see
> +                * !n_used_mmu_pages so many times.
> +                */
> +               if (!nr_to_scan--)
> +                       break;
> +
> +               /*
> +                * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> +                * here. We may skip a VM instance errorneosly, but we do not
> +                * want to shrink a VM that only started to populate its MMU
> +                * anyway.
> +                */
> +               if (!kvm->arch.n_used_mmu_pages &&
> +                   !kvm_shadow_mmu_has_zapped_obsolete_pages(kvm))
> +                       continue;
> +
> +               freed = kvm_shadow_mmu_shrink_scan(kvm, sc->nr_to_scan);
> +
> +               /*
> +                * unfair on small ones
> +                * per-vm shrinkers cry out
> +                * sadness comes quickly
> +                */
> +               list_move_tail(&kvm->vm_list, &vm_list);
> +               break;
> +       }
> +
> +       mutex_unlock(&kvm_lock);
> +       return freed;
> +}
> +
>  static struct shrinker mmu_shrinker = {
>         .count_objects = mmu_shrink_count,
>         .scan_objects = mmu_shrink_scan,
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.c b/arch/x86/kvm/mmu/shadow_mmu.c
> index 090b4788f7de..1259c4a3b140 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.c
> +++ b/arch/x86/kvm/mmu/shadow_mmu.c
> @@ -3147,7 +3147,7 @@ void kvm_zap_obsolete_pages(struct kvm *kvm)
>         kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
>  }
>
> -static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm)

Function renaming and removing static should be two separate commits.

>  {
>         return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
>  }
> @@ -3416,60 +3416,24 @@ void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
>                 kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
>  }
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free)
>  {
> -       struct kvm *kvm;
> -       int nr_to_scan = sc->nr_to_scan;
>         unsigned long freed = 0;
> +       int idx;
>
> -       mutex_lock(&kvm_lock);
> -
> -       list_for_each_entry(kvm, &vm_list, vm_list) {
> -               int idx;
> -               LIST_HEAD(invalid_list);
> -
> -               /*
> -                * Never scan more than sc->nr_to_scan VM instances.
> -                * Will not hit this condition practically since we do not try
> -                * to shrink more than one VM and it is very unlikely to see
> -                * !n_used_mmu_pages so many times.
> -                */
> -               if (!nr_to_scan--)
> -                       break;
> -               /*
> -                * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> -                * here. We may skip a VM instance errorneosly, but we do not
> -                * want to shrink a VM that only started to populate its MMU
> -                * anyway.
> -                */
> -               if (!kvm->arch.n_used_mmu_pages &&
> -                   !kvm_has_zapped_obsolete_pages(kvm))
> -                       continue;
> -
> -               idx = srcu_read_lock(&kvm->srcu);
> -               write_lock(&kvm->mmu_lock);
> -
> -               if (kvm_has_zapped_obsolete_pages(kvm)) {
> -                       kvm_mmu_commit_zap_page(kvm,
> -                             &kvm->arch.zapped_obsolete_pages);
> -                       goto unlock;
> -               }
> +       idx = srcu_read_lock(&kvm->srcu);
> +       write_lock(&kvm->mmu_lock);
>
> -               freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
> +       if (kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) {
> +               kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
> +               goto out;
> +       }
>
> -unlock:
> -               write_unlock(&kvm->mmu_lock);
> -               srcu_read_unlock(&kvm->srcu, idx);
> +       freed = kvm_mmu_zap_oldest_mmu_pages(kvm, pages_to_free);
>
> -               /*
> -                * unfair on small ones
> -                * per-vm shrinkers cry out
> -                * sadness comes quickly
> -                */
> -               list_move_tail(&kvm->vm_list, &vm_list);
> -               break;
> -       }
> +out:
> +       write_unlock(&kvm->mmu_lock);
> +       srcu_read_unlock(&kvm->srcu, idx);
>
> -       mutex_unlock(&kvm_lock);
>         return freed;
>  }
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.h b/arch/x86/kvm/mmu/shadow_mmu.h
> index 20c65a0ea52c..9952aa1e86cf 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.h
> +++ b/arch/x86/kvm/mmu/shadow_mmu.h
> @@ -99,7 +99,8 @@ void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
>  void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
>                                     const struct kvm_memory_slot *slot);
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc);
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm);
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free);
>
>  /* Exports from paging_tmpl.h */
>  gpa_t paging32_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> --
> 2.39.0.314.g84b9a713c41-goog
>