Re: Lockdep warnings on kexec (virtio_blk, hrtimers)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Dec 13, 2024 at 6:05 PM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:
>
> On Fri, Dec 13 2024 at 14:07, David Woodhouse wrote:
> > On Fri, 2024-12-13 at 14:23 +0100, Thomas Gleixner wrote:
> >> That's only true for the case where the new kernel takes over.
> >>
> >> In the case KEXEC_JUMP=n and kexec_image->preserve_context == true, then
> >> it is supposed to align with suspend/resume and if you look at the code
> >> then it actually mimics suspend/resume in the most dilettanteish way.
> >
> > Did you mean KEXEC_JUMP=y there?
>
> Yes, of course.
>
> > I spent a while the other week trying to understand the case where
> > CONFIG_KEXEC_JUMP=n and kexec_image->preserve_context=true, and came to
> > the conclusion that it was a mirage. Userspace can't *actually* set the
> > KEXEC_PRESERVE_CONTEXT bit when setting up the image, if KEXEC_JUMP=n.
> >
> > The whole of the code path for that case is dead code. It's confusing
> > because as discussed elsewhere, we don't just #ifdef out the whole of
> > that dead code path, but only the bits which don't actually *compile*
> > (like references to restore_processor_state() etc.).
>
> Yes, I had to stare at it quite a while. :)
>
> >> It's a patently bad idea to clobber the kernel with kexec jump "fixes"
> >> instead of using the well tested and established suspend/resume
> >> machinery.
> >>
> >> All it takes is to:
> >>
> >>     1) disable the wakeup logic
> >>
> >>     2) provide a mechanism to invoke machine_kexec() instead of the
> >>        actual suspend mechanism.
> >>
> >> No?
> >
> > Agreed. The hacky proof of concept I posted earlier invoking
> > machine_kexec() instead of suspend_ops->enter() works fine. I'll look
> > at cleaning it up and making it not invoke all the ACPI hooks for
> > *actual* suspend to RAM, etc.
>
> Something like the below? It survived an hour of loop testing.
>
> > As I noted though, it doesn't address that linux-scsi report which was
> > a *real* kexec, not a kjump.
>
> I was not looking at that path at all.
>
> Thanks,
>
>         tglx
> ---
> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -582,4 +582,7 @@ enum suspend_stat_step {
>  void dpm_save_failed_dev(const char *name);
>  void dpm_save_failed_step(enum suspend_stat_step step);
>
> +int kexec_suspend(void);
> +void kexec_machine_execute(void);
> +
>  #endif /* _LINUX_SUSPEND_H */
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -984,6 +984,12 @@ bool kexec_load_permitted(int kexec_imag
>         return true;
>  }
>
> +void kexec_machine_execute(void)
> +{
> +       kmsg_dump(KMSG_DUMP_SHUTDOWN);
> +       machine_kexec(kexec_image);
> +}
> +
>  /*
>   * Move into place and start executing a preloaded standalone
>   * executable.  If nothing was preloaded return an error.
> @@ -999,38 +1005,9 @@ int kernel_kexec(void)
>                 goto Unlock;
>         }
>
> -#ifdef CONFIG_KEXEC_JUMP
> -       if (kexec_image->preserve_context) {
> -               pm_prepare_console();
> -               error = freeze_processes();
> -               if (error) {
> -                       error = -EBUSY;
> -                       goto Restore_console;
> -               }
> -               suspend_console();
> -               error = dpm_suspend_start(PMSG_FREEZE);
> -               if (error)
> -                       goto Resume_console;
> -               /* At this point, dpm_suspend_start() has been called,
> -                * but *not* dpm_suspend_end(). We *must* call
> -                * dpm_suspend_end() now.  Otherwise, drivers for
> -                * some devices (e.g. interrupt controllers) become
> -                * desynchronized with the actual state of the
> -                * hardware at resume time, and evil weirdness ensues.
> -                */
> -               error = dpm_suspend_end(PMSG_FREEZE);
> -               if (error)
> -                       goto Resume_devices;
> -               error = suspend_disable_secondary_cpus();
> -               if (error)
> -                       goto Enable_cpus;
> -               local_irq_disable();
> -               error = syscore_suspend();
> -               if (error)
> -                       goto Enable_irqs;
> -       } else
> -#endif
> -       {
> +       if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_image->preserve_context) {
> +               error = kexec_suspend();
> +       } else {
>                 kexec_in_progress = true;
>                 kernel_restart_prepare("kexec reboot");
>                 migrate_to_reboot_cpu();
> @@ -1045,30 +1022,10 @@ int kernel_kexec(void)
>                 cpu_hotplug_enable();
>                 pr_notice("Starting new kernel\n");
>                 machine_shutdown();
> +               kexec_machine_execute();
>         }
>
> -       kmsg_dump(KMSG_DUMP_SHUTDOWN);
> -       machine_kexec(kexec_image);
> -
> -#ifdef CONFIG_KEXEC_JUMP
> -       if (kexec_image->preserve_context) {
> -               syscore_resume();
> - Enable_irqs:
> -               local_irq_enable();
> - Enable_cpus:
> -               suspend_enable_secondary_cpus();
> -               dpm_resume_start(PMSG_RESTORE);
> - Resume_devices:
> -               dpm_resume_end(PMSG_RESTORE);
> - Resume_console:
> -               resume_console();
> -               thaw_processes();
> - Restore_console:
> -               pm_restore_console();
> -       }
> -#endif
> -
> - Unlock:
> +Unlock:
>         kexec_unlock();
>         return error;
>  }
> --- a/kernel/power/suspend.c
> +++ b/kernel/power/suspend.c
> @@ -400,7 +400,7 @@ void __weak arch_suspend_enable_irqs(voi
>   *
>   * This function should be called after devices have been suspended.
>   */
> -static int suspend_enter(suspend_state_t state, bool *wakeup)
> +static int suspend_enter(suspend_state_t state, bool *wakeup, bool kexec_jump)
>  {
>         int error;
>
> @@ -445,15 +445,19 @@ static int suspend_enter(suspend_state_t
>
>         error = syscore_suspend();
>         if (!error) {
> -               *wakeup = pm_wakeup_pending();
> -               if (!(suspend_test(TEST_CORE) || *wakeup)) {
> -                       trace_suspend_resume(TPS("machine_suspend"),
> -                               state, true);
> -                       error = suspend_ops->enter(state);
> -                       trace_suspend_resume(TPS("machine_suspend"),
> -                               state, false);
> -               } else if (*wakeup) {
> -                       error = -EBUSY;
> +               if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_jump) {
> +                       kexec_machine_execute();
> +               } else {
> +                       *wakeup = pm_wakeup_pending();
> +                       if (!(suspend_test(TEST_CORE) || *wakeup)) {
> +                               trace_suspend_resume(TPS("machine_suspend"),
> +                                                    state, true);
> +                               error = suspend_ops->enter(state);
> +                               trace_suspend_resume(TPS("machine_suspend"),
> +                                                    state, false);
> +                       } else if (*wakeup) {
> +                               error = -EBUSY;
> +                       }
>                 }
>                 syscore_resume();
>         }
> @@ -485,7 +489,7 @@ static int suspend_enter(suspend_state_t
>   * suspend_devices_and_enter - Suspend devices and enter system sleep state.
>   * @state: System sleep state to enter.
>   */
> -int suspend_devices_and_enter(suspend_state_t state)
> +static int __suspend_devices_and_enter(suspend_state_t state, bool kexec_jump)
>  {
>         int error;
>         bool wakeup = false;
> @@ -514,7 +518,7 @@ int suspend_devices_and_enter(suspend_st
>                 goto Recover_platform;
>
>         do {
> -               error = suspend_enter(state, &wakeup);
> +               error = suspend_enter(state, &wakeup, kexec_jump);
>         } while (!error && !wakeup && platform_suspend_again(state));
>
>   Resume_devices:
> @@ -536,6 +540,15 @@ int suspend_devices_and_enter(suspend_st
>  }
>
>  /**
> + * suspend_devices_and_enter - Suspend devices and enter system sleep state.
> + * @state: System sleep state to enter.
> + */
> +int suspend_devices_and_enter(suspend_state_t state)
> +{
> +       return __suspend_devices_and_enter(state, false);
> +}
> +
> +/**
>   * suspend_finish - Clean up before finishing the suspend sequence.
>   *
>   * Call platform code to clean up, restart processes, and free the console that
> @@ -607,6 +620,21 @@ static int enter_state(suspend_state_t s
>         return error;
>  }
>
> +#ifdef CONFIG_KEXEC_JUMP
> +int kexec_suspend(void)
> +{
> +       int error;
> +
> +       ksys_sync_helper();
> +       error = freeze_processes();
> +       if (error)
> +               return error;
> +       error = __suspend_devices_and_enter(PM_SUSPEND_MEM, true);

Why do you want to hook this up to the suspend code?





[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux