On Mon, Jan 16, 2023 at 04:59:04PM +0000, Mark Rutland wrote:
I'm sorry to have to bear some bad news on that front. :(
Moo, something had to give..
IIUC what's happenign here is the PSCI cpuidle driver has entered idle and RCU is no longer watching when arm64's cpu_suspend() manipulates DAIF. Our local_daif_*() helpers poke lockdep and tracing, hence the call to trace_hardirqs_off() and the RCU usage.
Right, strictly speaking not needed at this point, IRQs should have been traced off a long time ago.
I think we need RCU to be watching all the way down to cpu_suspend(), and it's cpu_suspend() that should actually enter/exit idle context. That and we need to make cpu_suspend() and the low-level PSCI invocation noinstr. I'm not sure whether 32-bit will have a similar issue or not.
I'm not seeing 32bit or Risc-V have similar issues here, but who knows, maybe I missed somsething. In any case, the below ought to cure the ARM64 case and remove that last known RCU_NONIDLE() user as a bonus. --- diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 41974a1a229a..42e19fff40ee 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -67,10 +67,10 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) u32 state = lpi->address; if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter, + return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, lpi->index, state); else - return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, lpi->index, state); } #endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index e7163f31f716..0fbdf5fe64d8 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -4,6 +4,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/pgtable.h> +#include <linux/cpuidle.h> #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> @@ -104,6 +105,10 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * From this point debug exceptions are disabled to prevent * updates to mdscr register (saved and restored along with * general purpose registers) from kernel debuggers. + * + * Strictly speaking the trace_hardirqs_off() here is superfluous, + * hardirqs should be firmly off by now. This really ought to use + * something like raw_local_daif_save(). */ flags = local_daif_save(); @@ -120,6 +125,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ arm_cpuidle_save_irq_context(&context); + ct_cpuidle_enter(); + if (__cpu_suspend_enter(&state)) { /* Call the suspend finisher */ ret = fn(arg); @@ -133,8 +140,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ if (!ret) ret = -EOPNOTSUPP; + + ct_cpuidle_exit(); } else { - RCU_NONIDLE(__cpu_suspend_exit()); + ct_cpuidle_exit(); + __cpu_suspend_exit(); } arm_cpuidle_restore_irq_context(&context); diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 4fc4e0381944..312a34ef28dc 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -69,16 +69,12 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev, else pm_runtime_put_sync_suspend(pd_dev); - ct_cpuidle_enter(); - state = psci_get_domain_state(); if (!state) state = states[idx]; ret = psci_cpu_suspend_enter(state) ? -1 : idx; - ct_cpuidle_exit(); - if (s2idle) dev_pm_genpd_resume(pd_dev); else @@ -192,7 +188,7 @@ static __cpuidle int psci_enter_idle_state(struct cpuidle_device *dev, { u32 *state = __this_cpu_read(psci_cpuidle_data.psci_states); - return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state[idx]); + return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, idx, state[idx]); } static const struct of_device_id psci_idle_state_match[] = { diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e7bcfca4159f..f3a044fa4652 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -462,11 +462,22 @@ int psci_cpu_suspend_enter(u32 state) if (!psci_power_state_loses_context(state)) { struct arm_cpuidle_irq_context context; + ct_cpuidle_enter(); arm_cpuidle_save_irq_context(&context); ret = psci_ops.cpu_suspend(state, 0); arm_cpuidle_restore_irq_context(&context); + ct_cpuidle_exit(); } else { + /* + * ARM64 cpu_suspend() wants to do ct_cpuidle_*() itself. + */ + if (!IS_ENABLED(CONFIG_ARM64)) + ct_cpuidle_enter(); + ret = cpu_suspend(state, psci_suspend_finisher); + + if (!IS_ENABLED(CONFIG_ARM64)) + ct_cpuidle_exit(); } return ret; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 630c879143c7..3183aeb7f5b4 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -307,7 +307,7 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); #define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, \ idx, \ state, \ - is_retention) \ + is_retention, is_rcu) \ ({ \ int __ret = 0; \ \ @@ -319,9 +319,11 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ - ct_cpuidle_enter(); \ + if (!is_rcu) \ + ct_cpuidle_enter(); \ __ret = low_level_idle_enter(state); \ - ct_cpuidle_exit(); \ + if (!is_rcu) \ + ct_cpuidle_exit(); \ if (!is_retention) \ cpu_pm_exit(); \ } \ @@ -330,15 +332,21 @@ extern s64 cpuidle_governor_latency_req(unsigned int cpu); }) #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0, 0) #define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1, 0) #define CPU_PM_CPU_IDLE_ENTER_PARAM(low_level_idle_enter, idx, state) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0, 0) + +#define CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0, 1) #define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(low_level_idle_enter, idx, state) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1, 0) + +#define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1, 1) #endif /* _LINUX_CPUIDLE_H */