On 2/22/2022 9:59 PM, Mario Limonciello wrote:
commit 59348401ebed ("platform/x86: amd-pmc: Add special handling for
timer based S0i3 wakeup") adds support for using another platform timer
in lieu of the RTC which doesn't work properly on some systems. This path
was validated and worked well before submission. During the 5.16-rc1 merge
window other patches were merged that caused this to stop working properly.
When this feature was used with 5.16-rc1 or later some OEM laptops with the
matching firmware requirements from that commit would shutdown instead of
program a timer based wakeup.
This was bisected to commit 8d89835b0467 ("PM: suspend: Do not pause
cpuidle in the suspend-to-idle path"). This wasn't supposed to cause any
negative impacts and also tested well on both Intel and ARM platforms.
However this changed the semantics of when CPUs are allowed to be in the
deepest state. For the AMD systems in question it appears this causes a
firmware crash for timer based wakeup.
It's hypothesized to be caused by the `amd-pmc` driver sending `OS_HINT`
and all the CPUs going into a deep state while the timer is still being
programmed. It's likely a firmware bug, but to avoid it don't allow setting
CPUs into the deepest state while using CZN timer wakeup path.
If later it's discovered that this also occurs from "regular" suspends
without a timer as well or on other silicon, this may be later expanded to
run in the suspend path for more scenarios.
Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
Link: https://lore.kernel.org/linux-acpi/BL1PR12MB51570F5BD05980A0DCA1F3F4E23A9@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/T/#mee35f39c41a04b624700ab2621c795367f19c90e
Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path")
Fixes: 23f62d7ab25b ("PM: sleep: Pause cpuidle later and resume it earlier during system transitions")
Fixes: 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup"
Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
---
Changes from v1->v2:
* Update for Rafael's suggested changes:
- fix resume comment
- undo QoS at end of resume instead
- Use smaller number for QoS to allow C1 entry
drivers/platform/x86/amd-pmc.c | 39 ++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 9 deletions(-)
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 4c72ba68b315..248dc6735f5a 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
#include <linux/rtc.h>
#include <linux/suspend.h>
#include <linux/seq_file.h>
@@ -131,6 +132,7 @@ struct amd_pmc_dev {
struct device *dev;
struct pci_dev *rdev;
struct mutex lock; /* generic mutex lock */
+ struct pm_qos_request amd_pmc_pm_qos_req;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbgfs_dir;
#endif /* CONFIG_DEBUG_FS */
@@ -521,6 +523,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
rc = rtc_alarm_irq_enable(rtc_device, 0);
dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration);
+ /*
+ * Prevent CPUs from getting into idle states while running the code
This should be "into deep idle states" now.
+ * below which is generally safe to run when none of the CPUs are in
+ * idle states.
+ */
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, 3);
And I would define a symbol for the 3, possibly with a comment
documenting it. Something like AMD_PMC_SUSPEND_MAX_IDLE_STATE_LATENCY
or similar.
With the above addressed, please feel free to add
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
to this patch.
And thanks for taking care of this!
+ wake_up_all_idle_cpus();
+
return rc;
}
@@ -538,24 +548,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
/* Activate CZN specific RTC functionality */
if (pdev->cpu_id == AMD_CPU_ID_CZN) {
rc = amd_pmc_verify_czn_rtc(pdev, &arg);
- if (rc < 0)
- return rc;
+ if (rc)
+ goto fail;
}
/* Dump the IdleMask before we send hint to SMU */
amd_pmc_idlemask_read(pdev, dev, NULL);
msg = amd_pmc_get_os_hint(pdev);
rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0);
- if (rc)
+ if (rc) {
dev_err(pdev->dev, "suspend failed\n");
+ goto fail;
+ }
if (enable_stb)
rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF);
- if (rc) {
+ if (rc) {
dev_err(pdev->dev, "error writing to STB\n");
- return rc;
+ goto fail;
}
+ return 0;
+fail:
+ if (pdev->cpu_id == AMD_CPU_ID_CZN)
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req,
+ PM_QOS_DEFAULT_VALUE);
return rc;
}
@@ -579,12 +596,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
/* Write data incremented by 1 to distinguish in stb_read */
if (enable_stb)
rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1);
- if (rc) {
+ if (rc)
dev_err(pdev->dev, "error writing to STB\n");
- return rc;
- }
- return 0;
+ /* Restore the QoS request back to defaults if it was set */
+ if (pdev->cpu_id == AMD_CPU_ID_CZN)
+ cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req,
+ PM_QOS_DEFAULT_VALUE);
+
+ return rc;
}
static const struct dev_pm_ops amd_pmc_pm_ops = {
@@ -722,6 +742,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
amd_pmc_get_smu_version(dev);
platform_set_drvdata(pdev, dev);
amd_pmc_dbgfs_register(dev);
+ cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE);
return 0;
err_pci_dev_put: