On Fri, Dec 13, 2024 at 6:05 PM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote: > > On Fri, Dec 13 2024 at 14:07, David Woodhouse wrote: > > On Fri, 2024-12-13 at 14:23 +0100, Thomas Gleixner wrote: > >> That's only true for the case where the new kernel takes over. > >> > >> In the case KEXEC_JUMP=n and kexec_image->preserve_context == true, then > >> it is supposed to align with suspend/resume and if you look at the code > >> then it actually mimics suspend/resume in the most dilettanteish way. > > > > Did you mean KEXEC_JUMP=y there? > > Yes, of course. > > > I spent a while the other week trying to understand the case where > > CONFIG_KEXEC_JUMP=n and kexec_image->preserve_context=true, and came to > > the conclusion that it was a mirage. Userspace can't *actually* set the > > KEXEC_PRESERVE_CONTEXT bit when setting up the image, if KEXEC_JUMP=n. > > > > The whole of the code path for that case is dead code. It's confusing > > because as discussed elsewhere, we don't just #ifdef out the whole of > > that dead code path, but only the bits which don't actually *compile* > > (like references to restore_processor_state() etc.). > > Yes, I had to stare at it quite a while. :) > > >> It's a patently bad idea to clobber the kernel with kexec jump "fixes" > >> instead of using the well tested and established suspend/resume > >> machinery. > >> > >> All it takes is to: > >> > >> 1) disable the wakeup logic > >> > >> 2) provide a mechanism to invoke machine_kexec() instead of the > >> actual suspend mechanism. > >> > >> No? > > > > Agreed. The hacky proof of concept I posted earlier invoking > > machine_kexec() instead of suspend_ops->enter() works fine. I'll look > > at cleaning it up and making it not invoke all the ACPI hooks for > > *actual* suspend to RAM, etc. > > Something like the below? It survived an hour of loop testing. > > > As I noted though, it doesn't address that linux-scsi report which was > > a *real* kexec, not a kjump. > > I was not looking at that path at all. > > Thanks, > > tglx > --- > --- a/include/linux/suspend.h > +++ b/include/linux/suspend.h > @@ -582,4 +582,7 @@ enum suspend_stat_step { > void dpm_save_failed_dev(const char *name); > void dpm_save_failed_step(enum suspend_stat_step step); > > +int kexec_suspend(void); > +void kexec_machine_execute(void); > + > #endif /* _LINUX_SUSPEND_H */ > --- a/kernel/kexec_core.c > +++ b/kernel/kexec_core.c > @@ -984,6 +984,12 @@ bool kexec_load_permitted(int kexec_imag > return true; > } > > +void kexec_machine_execute(void) > +{ > + kmsg_dump(KMSG_DUMP_SHUTDOWN); > + machine_kexec(kexec_image); > +} > + > /* > * Move into place and start executing a preloaded standalone > * executable. If nothing was preloaded return an error. > @@ -999,38 +1005,9 @@ int kernel_kexec(void) > goto Unlock; > } > > -#ifdef CONFIG_KEXEC_JUMP > - if (kexec_image->preserve_context) { > - pm_prepare_console(); > - error = freeze_processes(); > - if (error) { > - error = -EBUSY; > - goto Restore_console; > - } > - suspend_console(); > - error = dpm_suspend_start(PMSG_FREEZE); > - if (error) > - goto Resume_console; > - /* At this point, dpm_suspend_start() has been called, > - * but *not* dpm_suspend_end(). We *must* call > - * dpm_suspend_end() now. Otherwise, drivers for > - * some devices (e.g. interrupt controllers) become > - * desynchronized with the actual state of the > - * hardware at resume time, and evil weirdness ensues. > - */ > - error = dpm_suspend_end(PMSG_FREEZE); > - if (error) > - goto Resume_devices; > - error = suspend_disable_secondary_cpus(); > - if (error) > - goto Enable_cpus; > - local_irq_disable(); > - error = syscore_suspend(); > - if (error) > - goto Enable_irqs; > - } else > -#endif > - { > + if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_image->preserve_context) { > + error = kexec_suspend(); > + } else { > kexec_in_progress = true; > kernel_restart_prepare("kexec reboot"); > migrate_to_reboot_cpu(); > @@ -1045,30 +1022,10 @@ int kernel_kexec(void) > cpu_hotplug_enable(); > pr_notice("Starting new kernel\n"); > machine_shutdown(); > + kexec_machine_execute(); > } > > - kmsg_dump(KMSG_DUMP_SHUTDOWN); > - machine_kexec(kexec_image); > - > -#ifdef CONFIG_KEXEC_JUMP > - if (kexec_image->preserve_context) { > - syscore_resume(); > - Enable_irqs: > - local_irq_enable(); > - Enable_cpus: > - suspend_enable_secondary_cpus(); > - dpm_resume_start(PMSG_RESTORE); > - Resume_devices: > - dpm_resume_end(PMSG_RESTORE); > - Resume_console: > - resume_console(); > - thaw_processes(); > - Restore_console: > - pm_restore_console(); > - } > -#endif > - > - Unlock: > +Unlock: > kexec_unlock(); > return error; > } > --- a/kernel/power/suspend.c > +++ b/kernel/power/suspend.c > @@ -400,7 +400,7 @@ void __weak arch_suspend_enable_irqs(voi > * > * This function should be called after devices have been suspended. > */ > -static int suspend_enter(suspend_state_t state, bool *wakeup) > +static int suspend_enter(suspend_state_t state, bool *wakeup, bool kexec_jump) > { > int error; > > @@ -445,15 +445,19 @@ static int suspend_enter(suspend_state_t > > error = syscore_suspend(); > if (!error) { > - *wakeup = pm_wakeup_pending(); > - if (!(suspend_test(TEST_CORE) || *wakeup)) { > - trace_suspend_resume(TPS("machine_suspend"), > - state, true); > - error = suspend_ops->enter(state); > - trace_suspend_resume(TPS("machine_suspend"), > - state, false); > - } else if (*wakeup) { > - error = -EBUSY; > + if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_jump) { > + kexec_machine_execute(); > + } else { > + *wakeup = pm_wakeup_pending(); > + if (!(suspend_test(TEST_CORE) || *wakeup)) { > + trace_suspend_resume(TPS("machine_suspend"), > + state, true); > + error = suspend_ops->enter(state); > + trace_suspend_resume(TPS("machine_suspend"), > + state, false); > + } else if (*wakeup) { > + error = -EBUSY; > + } > } > syscore_resume(); > } > @@ -485,7 +489,7 @@ static int suspend_enter(suspend_state_t > * suspend_devices_and_enter - Suspend devices and enter system sleep state. > * @state: System sleep state to enter. > */ > -int suspend_devices_and_enter(suspend_state_t state) > +static int __suspend_devices_and_enter(suspend_state_t state, bool kexec_jump) > { > int error; > bool wakeup = false; > @@ -514,7 +518,7 @@ int suspend_devices_and_enter(suspend_st > goto Recover_platform; > > do { > - error = suspend_enter(state, &wakeup); > + error = suspend_enter(state, &wakeup, kexec_jump); > } while (!error && !wakeup && platform_suspend_again(state)); > > Resume_devices: > @@ -536,6 +540,15 @@ int suspend_devices_and_enter(suspend_st > } > > /** > + * suspend_devices_and_enter - Suspend devices and enter system sleep state. > + * @state: System sleep state to enter. > + */ > +int suspend_devices_and_enter(suspend_state_t state) > +{ > + return __suspend_devices_and_enter(state, false); > +} > + > +/** > * suspend_finish - Clean up before finishing the suspend sequence. > * > * Call platform code to clean up, restart processes, and free the console that > @@ -607,6 +620,21 @@ static int enter_state(suspend_state_t s > return error; > } > > +#ifdef CONFIG_KEXEC_JUMP > +int kexec_suspend(void) > +{ > + int error; > + > + ksys_sync_helper(); > + error = freeze_processes(); > + if (error) > + return error; > + error = __suspend_devices_and_enter(PM_SUSPEND_MEM, true); Why do you want to hook this up to the suspend code?