Re: [PATCH] KVM: x86/mmu: Don't create kvm-nx-lpage-re kthread if not itlb_multihit

zhuangel570 <zhuangel570@xxxxxxxxx> · Sat, 6 May 2023 15:12:55 +0800

The "never" parameter works for environments without ITLB MULTIHIT issue. But
for vulnerable environments, should we prohibit users from turning off
software mitigations?

As for the nx_huge_page_recovery_thread worker thread, this is a solution to
optimize software mitigation, maybe not needed in all cases.
For example, on a vulnerable machine, software mitigations need to be enabled,
but worker threads may not be needed when the VM determines that huge pages
are not in use (not sure).

Do you think it is possible to introduce a new parameter to disable worker
threads?

On Sat, May 6, 2023 at 1:44 AM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> On Fri, May 05, 2023, zhuangel570 wrote:
> > FYI, this is our test scenario, simulating the FaaS business, every VM assign
> > 0.1 core, starting lots VMs run in backgroud (such as 800 VM on a machine
> > with 80 cores), then burst create 10 VMs, then got 100ms+ latency in creating
> > "kvm-nx-lpage-recovery".
> >
> > On Tue, May 2, 2023 at 10:20 AM Robert Hoo <robert.hoo.linux@xxxxxxxxx> wrote:
> > >
> > > On 3/23/2023 3:18 PM, lirongqing@xxxxxxxxx wrote:
> > > > From: Li RongQing <lirongqing@xxxxxxxxx>
> > > >
> > > > if CPU has not X86_BUG_ITLB_MULTIHIT bug, kvm-nx-lpage-re kthread
> > > > is not needed to create
> > >
> > > (directed by Sean from
> > > https://lore.kernel.org/kvm/ZE%2FR1%2FhvbuWmD8mw@xxxxxxxxxx/ here.)
> > >
> > > No, I think it should tie to "nx_huge_pages" value rather than
> > > directly/partially tie to boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT).
>
> Lightly tested.  This is what I'm thinking for a "never" param.  Unless someone
> has an alternative idea, I'll post a formal patch after more testing.
>
> ---
>  arch/x86/kvm/mmu/mmu.c | 41 ++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index c8961f45e3b1..14713c050196 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -58,6 +58,8 @@
>
>  extern bool itlb_multihit_kvm_mitigation;
>
> +static bool nx_hugepage_mitigation_hard_disabled;
> +
>  int __read_mostly nx_huge_pages = -1;
>  static uint __read_mostly nx_huge_pages_recovery_period_ms;
>  #ifdef CONFIG_PREEMPT_RT
> @@ -67,12 +69,13 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
>  static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
>  #endif
>
> +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp);
>  static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
>  static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
>
>  static const struct kernel_param_ops nx_huge_pages_ops = {
>         .set = set_nx_huge_pages,
> -       .get = param_get_bool,
> +       .get = get_nx_huge_pages,
>  };
>
>  static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
> @@ -6844,6 +6847,14 @@ static void mmu_destroy_caches(void)
>         kmem_cache_destroy(mmu_page_header_cache);
>  }
>
> +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
> +{
> +       if (nx_hugepage_mitigation_hard_disabled)
> +               return sprintf(buffer, "never\n");
> +
> +       return param_get_bool(buffer, kp);
> +}
> +
>  static bool get_nx_auto_mode(void)
>  {
>         /* Return true when CPU has the bug, and mitigations are ON */
> @@ -6860,15 +6871,29 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
>         bool old_val = nx_huge_pages;
>         bool new_val;
>
> +       if (nx_hugepage_mitigation_hard_disabled)
> +               return -EPERM;
> +
>         /* In "auto" mode deploy workaround only if CPU has the bug. */
> -       if (sysfs_streq(val, "off"))
> +       if (sysfs_streq(val, "off")) {
>                 new_val = 0;
> -       else if (sysfs_streq(val, "force"))
> +       } else if (sysfs_streq(val, "force")) {
>                 new_val = 1;
> -       else if (sysfs_streq(val, "auto"))
> +       } else if (sysfs_streq(val, "auto")) {
>                 new_val = get_nx_auto_mode();
> -       else if (kstrtobool(val, &new_val) < 0)
> +       } if (sysfs_streq(val, "never")) {
> +               new_val = 0;
> +
> +               mutex_lock(&kvm_lock);
> +               if (!list_empty(&vm_list)) {
> +                       mutex_unlock(&kvm_lock);
> +                       return -EBUSY;
> +               }
> +               nx_hugepage_mitigation_hard_disabled = true;
> +               mutex_unlock(&kvm_lock);
> +       } else if (kstrtobool(val, &new_val) < 0) {
>                 return -EINVAL;
> +       }
>
>         __set_nx_huge_pages(new_val);
>
> @@ -7006,6 +7031,9 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
>         uint old_period, new_period;
>         int err;
>
> +       if (nx_hugepage_mitigation_hard_disabled)
> +               return -EPERM;
> +
>         was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
>
>         err = param_set_uint(val, kp);
> @@ -7161,6 +7189,9 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
>  {
>         int err;
>
> +       if (nx_hugepage_mitigation_hard_disabled)
> +               return 0;
> +
>         err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
>                                           "kvm-nx-lpage-recovery",
>                                           &kvm->arch.nx_huge_page_recovery_thread);
>
> base-commit: b3c98052d46948a8d65d2778c7f306ff38366aac
> --
>

-- 
——————————
   zhuangel570
——————————