Re: Lockdep warnings on kexec (virtio_blk, hrtimers)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Dec 13 2024 at 14:07, David Woodhouse wrote:
> On Fri, 2024-12-13 at 14:23 +0100, Thomas Gleixner wrote:
>> That's only true for the case where the new kernel takes over.
>> 
>> In the case KEXEC_JUMP=n and kexec_image->preserve_context == true, then
>> it is supposed to align with suspend/resume and if you look at the code
>> then it actually mimics suspend/resume in the most dilettanteish way.
>
> Did you mean KEXEC_JUMP=y there?

Yes, of course.

> I spent a while the other week trying to understand the case where
> CONFIG_KEXEC_JUMP=n and kexec_image->preserve_context=true, and came to
> the conclusion that it was a mirage. Userspace can't *actually* set the
> KEXEC_PRESERVE_CONTEXT bit when setting up the image, if KEXEC_JUMP=n.
>
> The whole of the code path for that case is dead code. It's confusing
> because as discussed elsewhere, we don't just #ifdef out the whole of
> that dead code path, but only the bits which don't actually *compile*
> (like references to restore_processor_state() etc.).

Yes, I had to stare at it quite a while. :)

>> It's a patently bad idea to clobber the kernel with kexec jump "fixes"
>> instead of using the well tested and established suspend/resume
>> machinery.
>> 
>> All it takes is to:
>> 
>>     1) disable the wakeup logic
>> 
>>     2) provide a mechanism to invoke machine_kexec() instead of the
>>        actual suspend mechanism.
>> 
>> No?
>
> Agreed. The hacky proof of concept I posted earlier invoking
> machine_kexec() instead of suspend_ops->enter() works fine. I'll look
> at cleaning it up and making it not invoke all the ACPI hooks for
> *actual* suspend to RAM, etc.

Something like the below? It survived an hour of loop testing.

> As I noted though, it doesn't address that linux-scsi report which was
> a *real* kexec, not a kjump.

I was not looking at that path at all.

Thanks,

        tglx
---
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -582,4 +582,7 @@ enum suspend_stat_step {
 void dpm_save_failed_dev(const char *name);
 void dpm_save_failed_step(enum suspend_stat_step step);
 
+int kexec_suspend(void);
+void kexec_machine_execute(void);
+
 #endif /* _LINUX_SUSPEND_H */
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -984,6 +984,12 @@ bool kexec_load_permitted(int kexec_imag
 	return true;
 }
 
+void kexec_machine_execute(void)
+{
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
+	machine_kexec(kexec_image);
+}
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
@@ -999,38 +1005,9 @@ int kernel_kexec(void)
 		goto Unlock;
 	}
 
-#ifdef CONFIG_KEXEC_JUMP
-	if (kexec_image->preserve_context) {
-		pm_prepare_console();
-		error = freeze_processes();
-		if (error) {
-			error = -EBUSY;
-			goto Restore_console;
-		}
-		suspend_console();
-		error = dpm_suspend_start(PMSG_FREEZE);
-		if (error)
-			goto Resume_console;
-		/* At this point, dpm_suspend_start() has been called,
-		 * but *not* dpm_suspend_end(). We *must* call
-		 * dpm_suspend_end() now.  Otherwise, drivers for
-		 * some devices (e.g. interrupt controllers) become
-		 * desynchronized with the actual state of the
-		 * hardware at resume time, and evil weirdness ensues.
-		 */
-		error = dpm_suspend_end(PMSG_FREEZE);
-		if (error)
-			goto Resume_devices;
-		error = suspend_disable_secondary_cpus();
-		if (error)
-			goto Enable_cpus;
-		local_irq_disable();
-		error = syscore_suspend();
-		if (error)
-			goto Enable_irqs;
-	} else
-#endif
-	{
+	if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_image->preserve_context) {
+		error = kexec_suspend();
+	} else {
 		kexec_in_progress = true;
 		kernel_restart_prepare("kexec reboot");
 		migrate_to_reboot_cpu();
@@ -1045,30 +1022,10 @@ int kernel_kexec(void)
 		cpu_hotplug_enable();
 		pr_notice("Starting new kernel\n");
 		machine_shutdown();
+		kexec_machine_execute();
 	}
 
-	kmsg_dump(KMSG_DUMP_SHUTDOWN);
-	machine_kexec(kexec_image);
-
-#ifdef CONFIG_KEXEC_JUMP
-	if (kexec_image->preserve_context) {
-		syscore_resume();
- Enable_irqs:
-		local_irq_enable();
- Enable_cpus:
-		suspend_enable_secondary_cpus();
-		dpm_resume_start(PMSG_RESTORE);
- Resume_devices:
-		dpm_resume_end(PMSG_RESTORE);
- Resume_console:
-		resume_console();
-		thaw_processes();
- Restore_console:
-		pm_restore_console();
-	}
-#endif
-
- Unlock:
+Unlock:
 	kexec_unlock();
 	return error;
 }
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -400,7 +400,7 @@ void __weak arch_suspend_enable_irqs(voi
  *
  * This function should be called after devices have been suspended.
  */
-static int suspend_enter(suspend_state_t state, bool *wakeup)
+static int suspend_enter(suspend_state_t state, bool *wakeup, bool kexec_jump)
 {
 	int error;
 
@@ -445,15 +445,19 @@ static int suspend_enter(suspend_state_t
 
 	error = syscore_suspend();
 	if (!error) {
-		*wakeup = pm_wakeup_pending();
-		if (!(suspend_test(TEST_CORE) || *wakeup)) {
-			trace_suspend_resume(TPS("machine_suspend"),
-				state, true);
-			error = suspend_ops->enter(state);
-			trace_suspend_resume(TPS("machine_suspend"),
-				state, false);
-		} else if (*wakeup) {
-			error = -EBUSY;
+		if (IS_ENABLED(CONFIG_KEXEC_JUMP) && kexec_jump) {
+			kexec_machine_execute();
+		} else {
+			*wakeup = pm_wakeup_pending();
+			if (!(suspend_test(TEST_CORE) || *wakeup)) {
+				trace_suspend_resume(TPS("machine_suspend"),
+						     state, true);
+				error = suspend_ops->enter(state);
+				trace_suspend_resume(TPS("machine_suspend"),
+						     state, false);
+			} else if (*wakeup) {
+				error = -EBUSY;
+			}
 		}
 		syscore_resume();
 	}
@@ -485,7 +489,7 @@ static int suspend_enter(suspend_state_t
  * suspend_devices_and_enter - Suspend devices and enter system sleep state.
  * @state: System sleep state to enter.
  */
-int suspend_devices_and_enter(suspend_state_t state)
+static int __suspend_devices_and_enter(suspend_state_t state, bool kexec_jump)
 {
 	int error;
 	bool wakeup = false;
@@ -514,7 +518,7 @@ int suspend_devices_and_enter(suspend_st
 		goto Recover_platform;
 
 	do {
-		error = suspend_enter(state, &wakeup);
+		error = suspend_enter(state, &wakeup, kexec_jump);
 	} while (!error && !wakeup && platform_suspend_again(state));
 
  Resume_devices:
@@ -536,6 +540,15 @@ int suspend_devices_and_enter(suspend_st
 }
 
 /**
+ * suspend_devices_and_enter - Suspend devices and enter system sleep state.
+ * @state: System sleep state to enter.
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+	return __suspend_devices_and_enter(state, false);
+}
+
+/**
  * suspend_finish - Clean up before finishing the suspend sequence.
  *
  * Call platform code to clean up, restart processes, and free the console that
@@ -607,6 +620,21 @@ static int enter_state(suspend_state_t s
 	return error;
 }
 
+#ifdef CONFIG_KEXEC_JUMP
+int kexec_suspend(void)
+{
+	int error;
+
+	ksys_sync_helper();
+	error = freeze_processes();
+	if (error)
+		return error;
+	error = __suspend_devices_and_enter(PM_SUSPEND_MEM, true);
+	thaw_processes();
+	return error;
+}
+#endif
+
 /**
  * pm_suspend - Externally visible function for suspending the system.
  * @state: System sleep state to enter.







[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux