On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx> wrote: > > The kernel gives the HFI hardware a memory region that the latter uses to > provide updates to the HFI table. The kernel allocates this memory region > at boot. It remains constant throughout runtime time. > > When resuming from suspend or hibernation, the restore kernel allocates a > second memory buffer and reprograms the HFI hardware with the new location > as part of a normal boot. The location of the second memory buffer may > differ from the one allocated by the image kernel. Subsequently, when the > restore kernel transfers control to the image kernel, the second buffer > becomes invalid, potentially leading to memory corruption if the hardware > writes to it (hardware continues using the buffer from the restore kernel). > > Add a suspend notifier to disable all HFI instances before jumping to the > image kernel and enable them once the image kernel has been restored. Use > the memory buffer that the image kernel allocated. > > For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled > and enabled during suspend and resume, respectively. > > The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI > instance of the boot CPU from the suspend notifier callback. > > Cc: Chen Yu <yu.c.chen@xxxxxxxxx> > Cc: Len Brown <len.brown@xxxxxxxxx> > Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx> > Cc: Stanislaw Gruszka <stanislaw.gruszka@xxxxxxxxxxxxxxx> > Cc: Zhang Rui <rui.zhang@xxxxxxxxx> > Cc: Zhao Liu <zhao1.liu@xxxxxxxxxxxxxxx> > Cc: linux-pm@xxxxxxxxxxxxxxx > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx> > --- > drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++ > 1 file changed, 53 insertions(+) > > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c > index d2c874f43786..965c245e5e78 100644 > --- a/drivers/thermal/intel/intel_hfi.c > +++ b/drivers/thermal/intel/intel_hfi.c > @@ -30,11 +30,13 @@ > #include <linux/kernel.h> > #include <linux/math.h> > #include <linux/mutex.h> > +#include <linux/notifier.h> > #include <linux/percpu-defs.h> > #include <linux/printk.h> > #include <linux/processor.h> > #include <linux/slab.h> > #include <linux/spinlock.h> > +#include <linux/suspend.h> > #include <linux/string.h> > #include <linux/topology.h> > #include <linux/workqueue.h> > @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void) > return 0; > } > > +static void hfi_do_pm_enable(void *info) > +{ > + struct hfi_instance *hfi_instance = info; > + > + hfi_set_hw_table(hfi_instance); > + hfi_enable(); The above do RMW, so should locking be used here? > +} > + > +static void hfi_do_pm_disable(void *info) > +{ > + hfi_disable(); > +} And here? > + > +static int hfi_pm_notify(struct notifier_block *nb, > + unsigned long mode, void *unused) > +{ > + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); > + struct hfi_instance *hfi_instance = info->hfi_instance; > + > + /* HFI may not be in use. */ > + if (!hfi_instance) > + return 0; > + > + /* > + * Only handle the HFI instance of the package of the boot CPU. The > + * instances of other packages are handled in the CPU hotplug callbacks. > + */ > + switch (mode) { > + case PM_HIBERNATION_PREPARE: > + case PM_SUSPEND_PREPARE: > + case PM_RESTORE_PREPARE: > + return smp_call_function_single(0, hfi_do_pm_disable, > + NULL, true); > + > + case PM_POST_RESTORE: > + case PM_POST_HIBERNATION: > + case PM_POST_SUSPEND: > + return smp_call_function_single(0, hfi_do_pm_enable, > + hfi_instance, true); > + default: > + return -EINVAL; > + } > +} > + > +static struct notifier_block hfi_pm_nb = { > + .notifier_call = hfi_pm_notify, > +}; > + > void __init intel_hfi_init(void) > { > struct hfi_instance *hfi_instance; > int i, j; > > + if (register_pm_notifier(&hfi_pm_nb)) > + return; > + > if (hfi_parse_features()) > return; > > --