Re: Regression in 5.16-rc1 with suspend to idle

"Rafael J. Wysocki" <rafael@xxxxxxxxxx> · Mon, 21 Feb 2022 17:05:36 +0100

On Sat, Feb 19, 2022 at 4:01 AM Limonciello, Mario
<Mario.Limonciello@xxxxxxx> wrote:
>
> [Public]
>
> > it:
> > > >
> > > > diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
> > > > index 04ea92cbd9cf..f5bf46782043 100644
> > > > --- a/drivers/base/power/main.c
> > > > +++ b/drivers/base/power/main.c
> > > > @@ -32,6 +32,7 @@
> > > >  #include <linux/suspend.h>
> > > >  #include <trace/events/power.h>
> > > >  #include <linux/cpufreq.h>
> > > > +#include <linux/cpuidle.h>
> > > >  #include <linux/devfreq.h>
> > > >  #include <linux/timer.h>
> > > >
> > > > @@ -1350,6 +1351,8 @@ int dpm_suspend_noirq(pm_message_t state)
> > > >  {
> > > >         int ret;
> > > >
> > > > +       cpuidle_pause();
> > >
> > > Can you replace this with wake_up_all_idle_cpus() and remove the
> > > cpuidle_resume()/cpuidle_pause() calls from s2idle_enter() and see
> > > what happens?
> > >
> > > > +
> > > >         device_wakeup_arm_wake_irqs();
> > > >         suspend_device_irqs();
> > > >
> > > > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
> > > > index 6fcdee7e87a5..1708a643ba5d 100644
> > > > --- a/kernel/power/suspend.c
> > > > +++ b/kernel/power/suspend.c
> > > > @@ -97,6 +97,7 @@ static void s2idle_enter(void)
> > > >         raw_spin_unlock_irq(&s2idle_lock);
> > > >
> > > >         cpus_read_lock();
> > > > +       cpuidle_resume();
> > > >
> > > >         /* Push all the CPUs into the idle loop. */
> > > >         wake_up_all_idle_cpus();
> > > > @@ -104,6 +105,7 @@ static void s2idle_enter(void)
> > > >         swait_event_exclusive(s2idle_wait_head,
> > > >                     s2idle_state == S2IDLE_STATE_WAKE);
> > > >
> > > > +       cpuidle_pause();
> > > >         cpus_read_unlock();
> > > >
> > > >         raw_spin_lock_irq(&s2idle_lock);
> > > >
> > > >
> > > > * Removing the cpuidle_pause call from s2idle_enter will fix the crash,
> > > >   but the system doesn't enter the deepest sleep state.
> > >
> > > I think you mean that removing it doesn't make a difference except
> > > that it prevents the deepest state from being entered.
> > >
> > > Pausing cpuidle here is kind of redundant, because it just flips the
> > > "initialized" flag that's going to be flipped again by
> > > cpuidle_resume() in the next iteration.  [BTW, your patch is missing
> > > one an additional cpuidle_resume() in the resume path to match this
> > > cpuidle_pause().]  Also calling  wake_up_all_idle_cpus() is not likely
> > > to make a difference, so I'm guessing that synchronize_rcu() does the
> > > trick.
> > >
> > > > * removing the cpuidle_pause call from dpm_suspend_noirq the
> > firmware continues to
> > > >    crash.
> > >
> > > So the crash occurs while running dpm_suspend_noirq().
> > >
> > > > I also confirmed that reverting both of these commits together on top of
> > 5.17-rc4 fixes it:
> > > >
> > > > 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-
> > idle path")
> > > > 23f62d7ab25b ("PM: sleep: Pause cpuidle later and resume it earlier
> > during system transitions")
> > > >
> > > > The commit messages at least make it sound like it was just a rework for
> > unification of the codepaths,
> > > > not supposed to be for anything to be actually fixed, so I would think it
> > should be safe to revert.
> > >
> > > There is a deeper issue related to these commits and I'm not inclined
> > > to go back to the old code if there is only one system affected by
> > > this and the problem is related to the platform firmware on that
> > > system.
> > >
> > > > So please advise which way you want to go and I'll send up a patch (or if
> > you want to write one
> > > > I'm happy to take it and test it since I can readily reproduce it).
> > >
> > > To start with, please test the attached debug patch on top of -rc4.
> >
> > Attached is another patch to try, testing the hypothesis that the
> > observed crash is related to CPUs being in idle state that are too
> > deep for some reason during late suspend and early resume.
>
> I tried 3 test kernels:
> * 5.17-rc4 + Your second debugging patch
> * 5.17-rc4+ Your first debugging patch
> * 5.17-rc4 + A hack I wrote that pushed amd-pmc into "later" in the suspend
> using a global symbol called after LPS0 instead of letting it run in noirq stage
>
> It works properly on all of those, tried about 5x time in each.
>
> Then I confirmed I could still crash it on 5.17-rc4 with my control kernel.

I would do something like the attached patch, then (provided that it works).
---
 drivers/platform/x86/amd-pmc.c |   36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Index: linux-pm/drivers/platform/x86/amd-pmc.c
===================================================================

--- linux-pm.orig/drivers/platform/x86/amd-pmc.c
+++ linux-pm/drivers/platform/x86/amd-pmc.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
+#include <linux/pm_qos.h>
 #include <linux/rtc.h>
 #include <linux/suspend.h>
 #include <linux/seq_file.h>
@@ -144,6 +145,7 @@ static struct amd_pmc_dev pmc;
 static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret);
 static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data);
 static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf);
+static struct pm_qos_request amd_pmc_pm_qos_req;
 
 static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
 {
@@ -531,6 +533,14 @@ static int __maybe_unused amd_pmc_suspen
 	u8 msg;
 	u32 arg = 1;
 
+	/*
+	 * Prevent CPUs from getting into idle states while running the code
+	 * below which is generally safe to run when none of the CPUs are in
+	 * idle states.
+	 */
+	cpu_latency_qos_update_request(&amd_pmc_pm_qos_req, 0);
+	wake_up_all_idle_cpus();
+
 	/* Reset and Start SMU logging - to monitor the s0i3 stats */
 	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
 	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
@@ -539,7 +549,7 @@ static int __maybe_unused amd_pmc_suspen
 	if (pdev->cpu_id == AMD_CPU_ID_CZN) {
 		rc = amd_pmc_verify_czn_rtc(pdev, &arg);
 		if (rc < 0)
-			return rc;
+			goto out;
 	}
 
 	/* Dump the IdleMask before we send hint to SMU */
@@ -551,10 +561,11 @@ static int __maybe_unused amd_pmc_suspen
 
 	if (enable_stb)
 		rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF);
-	if (rc)	{
+	if (rc)
 		dev_err(pdev->dev, "error writing to STB\n");
-		return rc;
-	}
+
+out:
+	cpu_latency_qos_update_request(&amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE);
 
 	return rc;
 }
@@ -565,6 +576,14 @@ static int __maybe_unused amd_pmc_resume
 	int rc;
 	u8 msg;
 
+	/*
+	 * Prevent CPUs from getting into idle states while running the code
+	 * below which is generally safe to run when none of the CPUs are in
+	 * idle states.
+	 */
+	cpu_latency_qos_update_request(&amd_pmc_pm_qos_req, 0);
+	wake_up_all_idle_cpus();
+
 	msg = amd_pmc_get_os_hint(pdev);
 	rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0);
 	if (rc)
@@ -579,12 +598,12 @@ static int __maybe_unused amd_pmc_resume
 	/* Write data incremented by 1 to distinguish in stb_read */
 	if (enable_stb)
 		rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1);
-	if (rc)	{
+	if (rc)
 		dev_err(pdev->dev, "error writing to STB\n");
-		return rc;
-	}
 
-	return 0;
+	cpu_latency_qos_update_request(&amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE);
+
+	return rc;
 }
 
 static const struct dev_pm_ops amd_pmc_pm_ops = {
@@ -722,6 +741,7 @@ static int amd_pmc_probe(struct platform
 	amd_pmc_get_smu_version(dev);
 	platform_set_drvdata(pdev, dev);
 	amd_pmc_dbgfs_register(dev);
+	cpu_latency_qos_add_request(&amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE);
 	return 0;
 
 err_pci_dev_put: