On Tue, Aug 29, 2023 at 2:09 PM Fabio Estevam <festevam@xxxxxxxxx> wrote: > > From: Fabio Estevam <festevam@xxxxxxx> > > Currently, the default mechanism is to trigger a shutdown after the > critical temperature is reached. > > In some embedded cases, such behavior does not suit well, as the board may > be unattended in the field and rebooting may be a better approach. > > The bootloader may also check the temperature and only allow the boot to > proceed when the temperature is below a certain threshold. > > Introduce support for allowing a reboot to be triggered after the > critical temperature is reached. > > If the "critical-action" devicetree property is not found, fall back to > the shutdown action to preserve the existing default behavior. > > Tested on a i.MX8MM board with the following devicetree changes: > > thermal-zones { > cpu-thermal { > critical-action = "reboot"; > }; > }; > > Signed-off-by: Fabio Estevam <festevam@xxxxxxx> > --- > Changes since v3: > - None. > > drivers/thermal/thermal_core.c | 8 +++++++- > drivers/thermal/thermal_of.c | 27 +++++++++++++++++++++++++++ > include/linux/thermal.h | 6 ++++++ > 3 files changed, 40 insertions(+), 1 deletion(-) > > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c > index a59700593d32..f69e1667acb1 100644 > --- a/drivers/thermal/thermal_core.c > +++ b/drivers/thermal/thermal_core.c > @@ -320,11 +320,17 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz) > * Its a must for forced_emergency_poweroff_work to be scheduled. > */ > int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS; > + void (*hw_protection_action)(const char *reason, int ms_until_forced); > > dev_emerg(&tz->device, "%s: critical temperature reached, " > "shutting down\n", tz->type); > > - hw_protection_shutdown("Temperature too high", poweroff_delay_ms); > + hw_protection_action = hw_protection_shutdown; > + > + if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT) > + hw_protection_action = hw_protection_reboot; > + > + hw_protection_action("Temperature too high", poweroff_delay_ms); Why not define static const char *msg = "Temperature too high"; and then if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT) hw_protection_reboot(msg, poweroff_delay_ms); else hw_protection_shutdown((msg, poweroff_delay_ms); > } > EXPORT_SYMBOL(thermal_zone_device_critical); > > diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c > index 4ca905723429..8bc28cba7406 100644 > --- a/drivers/thermal/thermal_of.c > +++ b/drivers/thermal/thermal_of.c > @@ -218,6 +218,31 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int > return tz; > } > > +static const char * const critical_actions[] = { > + [THERMAL_CRITICAL_ACTION_SHUTDOWN] = "shutdown", > + [THERMAL_CRITICAL_ACTION_REBOOT] = "reboot", > +}; > + > +static void thermal_of_get_critical_action(struct device_node *np, > + enum thermal_action *action) > +{ > + const char *action_string; > + int i, ret; > + > + ret = of_property_read_string(np, "critical-action", &action_string); > + if (ret < 0) > + goto out_default_action; > + > + for (i = 0; i < ARRAY_SIZE(critical_actions); i++) > + if (!strcasecmp(action_string, critical_actions[i])) { > + *action = i; > + return; > + } > + > +out_default_action: > + *action = THERMAL_CRITICAL_ACTION_SHUTDOWN; > +} > + > static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay) > { > int ret; > @@ -516,6 +541,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * > goto out_kfree_trips; > } > > + thermal_of_get_critical_action(np, &tz->action); > + > ret = thermal_zone_device_enable(tz); > if (ret) { > pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n", > diff --git a/include/linux/thermal.h b/include/linux/thermal.h > index b449a46766f5..08854f640db9 100644 > --- a/include/linux/thermal.h > +++ b/include/linux/thermal.h > @@ -34,6 +34,11 @@ struct thermal_cooling_device; > struct thermal_instance; > struct thermal_attr; > > +enum thermal_action { > + THERMAL_CRITICAL_ACTION_SHUTDOWN, /* shutdown when crit temperature is reached */ THERMAL_CRITICAL_ACTION_SHUTDOWN = 0, so it is clear what will happen on non-DT platforms. > + THERMAL_CRITICAL_ACTION_REBOOT, /* reboot when crit temperature is reached */ > +}; > + > enum thermal_trend { > THERMAL_TREND_STABLE, /* temperature is stable */ > THERMAL_TREND_RAISING, /* temperature is raising */ > @@ -187,6 +192,7 @@ struct thermal_zone_device { > struct list_head node; > struct delayed_work poll_queue; > enum thermal_notify_event notify_event; > + enum thermal_action action; > }; > > /** > --