Re: [PATCH v5 3/3] thermal: thermal_core: Allow rebooting after critical temp

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 29, 2023 at 9:42 PM Fabio Estevam <festevam@xxxxxxxxx> wrote:
>
> From: Fabio Estevam <festevam@xxxxxxx>
>
> Currently, the default mechanism is to trigger a shutdown after the
> critical temperature is reached.
>
> In some embedded cases, such behavior does not suit well, as the board may
> be unattended in the field and rebooting may be a better approach.
>
> The bootloader may also check the temperature and only allow the boot to
> proceed when the temperature is below a certain threshold.
>
> Introduce support for allowing a reboot to be triggered after the
> critical temperature is reached.
>
> If the "critical-action" devicetree property is not found, fall back to
> the shutdown action to preserve the existing default behavior.
>
> Tested on a i.MX8MM board with the following devicetree changes:
>
>         thermal-zones {
>                 cpu-thermal {
>                         critical-action = "reboot";
>                 };
>         };
>
> Signed-off-by: Fabio Estevam <festevam@xxxxxxx>
> ---
> Changes since v4:
> - Simplify the logic inside thermal_zone_device_critical(). (Rafael)
> - Declare THERMAL_CRITICAL_ACTION_SHUTDOWN = 0 so it is clear what happens
> on non-DT platforms. (Rafael)
>
>  drivers/thermal/thermal_core.c |  6 +++++-
>  drivers/thermal/thermal_of.c   | 27 +++++++++++++++++++++++++++
>  include/linux/thermal.h        |  6 ++++++
>  3 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index a59700593d32..062114608667 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -320,11 +320,15 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz)
>          * Its a must for forced_emergency_poweroff_work to be scheduled.
>          */
>         int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
> +       static const char *msg = "Temperature too high";
>
>         dev_emerg(&tz->device, "%s: critical temperature reached, "
>                   "shutting down\n", tz->type);
>
> -       hw_protection_shutdown("Temperature too high", poweroff_delay_ms);
> +       if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT)
> +               hw_protection_reboot(msg, poweroff_delay_ms);
> +       else
> +               hw_protection_shutdown(msg, poweroff_delay_ms);
>  }
>  EXPORT_SYMBOL(thermal_zone_device_critical);
>
> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> index 4ca905723429..8bc28cba7406 100644
> --- a/drivers/thermal/thermal_of.c
> +++ b/drivers/thermal/thermal_of.c
> @@ -218,6 +218,31 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int
>         return tz;
>  }
>
> +static const char * const critical_actions[] = {
> +       [THERMAL_CRITICAL_ACTION_SHUTDOWN]      = "shutdown",
> +       [THERMAL_CRITICAL_ACTION_REBOOT]        = "reboot",
> +};
> +
> +static void thermal_of_get_critical_action(struct device_node *np,
> +                                          enum thermal_action *action)
> +{
> +       const char *action_string;
> +       int i, ret;
> +
> +       ret = of_property_read_string(np, "critical-action", &action_string);
> +       if (ret < 0)
> +               goto out_default_action;
> +
> +       for (i = 0; i < ARRAY_SIZE(critical_actions); i++)
> +               if (!strcasecmp(action_string, critical_actions[i])) {
> +                       *action = i;
> +                       return;
> +               }

This looks somewhat artificial and is a bit questionable (the index
variable should arguably start at THERMAL_CRITICAL_ACTION_SHUTDOWN,
for instance, and the "shutdown" item is redundant).

There are only two values and you want to carry out an emergency
shutdown anyway if the value is not "reboot".

I would just do

    if (!strcasecmp(action_string, "reboot")) {
            *action = THERMAL_CRITICAL_ACTION_REBOOT;
            return;
    }

> +
> +out_default_action:
> +       *action = THERMAL_CRITICAL_ACTION_SHUTDOWN;
> +}
> +
>  static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay)
>  {
>         int ret;
> @@ -516,6 +541,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
>                 goto out_kfree_trips;
>         }
>
> +       thermal_of_get_critical_action(np, &tz->action);
> +
>         ret = thermal_zone_device_enable(tz);
>         if (ret) {
>                 pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n",
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index b449a46766f5..b68e5734823d 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -34,6 +34,11 @@ struct thermal_cooling_device;
>  struct thermal_instance;
>  struct thermal_attr;
>
> +enum thermal_action {
> +       THERMAL_CRITICAL_ACTION_SHUTDOWN = 0, /* shutdown when crit temperature is reached */
> +       THERMAL_CRITICAL_ACTION_REBOOT, /* reboot when crit temperature is reached */
> +};
> +
>  enum thermal_trend {
>         THERMAL_TREND_STABLE, /* temperature is stable */
>         THERMAL_TREND_RAISING, /* temperature is raising */
> @@ -187,6 +192,7 @@ struct thermal_zone_device {
>         struct list_head node;
>         struct delayed_work poll_queue;
>         enum thermal_notify_event notify_event;
> +       enum thermal_action action;
>  };
>
>  /**
> --



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux