On Fri, Dec 13 2024 at 14:07, David Woodhouse wrote: > On Fri, 2024-12-13 at 14:23 +0100, Thomas Gleixner wrote: >> That's only true for the case where the new kernel takes over. >> >> In the case KEXEC_JUMP=n and kexec_image->preserve_context == true, then >> it is supposed to align with suspend/resume and if you look at the code >> then it actually mimics suspend/resume in the most dilettanteish way. > > Did you mean KEXEC_JUMP=y there? Yes, of course. > I spent a while the other week trying to understand the case where > CONFIG_KEXEC_JUMP=n and kexec_image->preserve_context=true, and came to > the conclusion that it was a mirage. Userspace can't *actually* set the > KEXEC_PRESERVE_CONTEXT bit when setting up the image, if KEXEC_JUMP=n. > > The whole of the code path for that case is dead code. It's confusing > because as discussed elsewhere, we don't just #ifdef out the whole of > that dead code path, but only the bits which don't actually *compile* > (like references to restore_processor_state() etc.). Yes, I had to stare at it quite a while. :) >> It's a patently bad idea to clobber the kernel with kexec jump "fixes" >> instead of using the well tested and established suspend/resume >> machinery. >> >> All it takes is to: >> >> 1) disable the wakeup logic >> >> 2) provide a mechanism to invoke machine_kexec() instead of the >> actual suspend mechanism. >> >> No? > > Agreed. The hacky proof of concept I posted earlier invoking > machine_kexec() instead of suspend_ops->enter() works fine. I'll look > at cleaning it up and making it not invoke all the ACPI hooks for > *actual* suspend to RAM, etc. Something like the below? It survived an hour of loop testing. > As I noted though, it doesn't address that linux-scsi report which was > a *real* kexec, not a kjump. I was not looking at that path at all. Thanks, tglx --- --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -582,4 +582,7 @@ enum suspend_stat_step { void dpm_save_failed_dev(const char *name); void dpm_save_failed_step(enum suspend_stat_step step); +int kexec_suspend(void); +void kexec_machine_execute(void); + #endif /* _LINUX_SUSPEND_H */ --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -984,6 +984,12 @@ bool kexec_load_permitted(int kexec_imag return true; } +void kexec_machine_execute(void) +{ + kmsg_dump(KMSG_DUMP_SHUTDOWN); + machine_kexec(kexec_image); +} + /* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. @@ -999,38 +1005,9 @@ int kernel_kexec(void) goto Unlock; } -#ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) { - pm_prepare_console(); - error = freeze_processes(); - if (error) { - error = -EBUSY; - goto Restore_console; - } - suspend_console(); - error = dpm_suspend_start(PMSG_FREEZE); - if (error) - goto Resume_console; - /* At this point, dpm_suspend_start() has been called, - * but *not* dpm_suspend_end(). We *must* call - * dpm_suspend_end() now. Otherwise, drivers for - * some devices (e.g. interrupt controllers) become - * desynchronized with the actual state of the - * hardware at resume time, and evil weirdness ensues. - */ - error = dpm_suspend_end(PMSG_FREEZE); - if (error) - goto Resume_devices; - error = suspend_disable_secondary_cpus(); - if (error) - goto Enable_cpus; - local_irq_disable(); - error = syscore_suspend(); - if (error) - goto Enable_irqs; - } else -#endif - { + if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_image->preserve_context) { + error = kexec_suspend(); + } else { kexec_in_progress = true; kernel_restart_prepare("kexec reboot"); migrate_to_reboot_cpu(); @@ -1045,30 +1022,10 @@ int kernel_kexec(void) cpu_hotplug_enable(); pr_notice("Starting new kernel\n"); machine_shutdown(); + kexec_machine_execute(); } - kmsg_dump(KMSG_DUMP_SHUTDOWN); - machine_kexec(kexec_image); - -#ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) { - syscore_resume(); - Enable_irqs: - local_irq_enable(); - Enable_cpus: - suspend_enable_secondary_cpus(); - dpm_resume_start(PMSG_RESTORE); - Resume_devices: - dpm_resume_end(PMSG_RESTORE); - Resume_console: - resume_console(); - thaw_processes(); - Restore_console: - pm_restore_console(); - } -#endif - - Unlock: +Unlock: kexec_unlock(); return error; } --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -400,7 +400,7 @@ void __weak arch_suspend_enable_irqs(voi * * This function should be called after devices have been suspended. */ -static int suspend_enter(suspend_state_t state, bool *wakeup) +static int suspend_enter(suspend_state_t state, bool *wakeup, bool kexec_jump) { int error; @@ -445,15 +445,19 @@ static int suspend_enter(suspend_state_t error = syscore_suspend(); if (!error) { - *wakeup = pm_wakeup_pending(); - if (!(suspend_test(TEST_CORE) || *wakeup)) { - trace_suspend_resume(TPS("machine_suspend"), - state, true); - error = suspend_ops->enter(state); - trace_suspend_resume(TPS("machine_suspend"), - state, false); - } else if (*wakeup) { - error = -EBUSY; + if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_jump) { + kexec_machine_execute(); + } else { + *wakeup = pm_wakeup_pending(); + if (!(suspend_test(TEST_CORE) || *wakeup)) { + trace_suspend_resume(TPS("machine_suspend"), + state, true); + error = suspend_ops->enter(state); + trace_suspend_resume(TPS("machine_suspend"), + state, false); + } else if (*wakeup) { + error = -EBUSY; + } } syscore_resume(); } @@ -485,7 +489,7 @@ static int suspend_enter(suspend_state_t * suspend_devices_and_enter - Suspend devices and enter system sleep state. * @state: System sleep state to enter. */ -int suspend_devices_and_enter(suspend_state_t state) +static int __suspend_devices_and_enter(suspend_state_t state, bool kexec_jump) { int error; bool wakeup = false; @@ -514,7 +518,7 @@ int suspend_devices_and_enter(suspend_st goto Recover_platform; do { - error = suspend_enter(state, &wakeup); + error = suspend_enter(state, &wakeup, kexec_jump); } while (!error && !wakeup && platform_suspend_again(state)); Resume_devices: @@ -536,6 +540,15 @@ int suspend_devices_and_enter(suspend_st } /** + * suspend_devices_and_enter - Suspend devices and enter system sleep state. + * @state: System sleep state to enter. + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + return __suspend_devices_and_enter(state, false); +} + +/** * suspend_finish - Clean up before finishing the suspend sequence. * * Call platform code to clean up, restart processes, and free the console that @@ -607,6 +620,21 @@ static int enter_state(suspend_state_t s return error; } +#ifdef CONFIG_KEXEC_JUMP +int kexec_suspend(void) +{ + int error; + + ksys_sync_helper(); + error = freeze_processes(); + if (error) + return error; + error = __suspend_devices_and_enter(PM_SUSPEND_MEM, true); + thaw_processes(); + return error; +} +#endif + /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter.