On Fri, Nov 23, 2018 at 04:55:59PM +0000, Marc Zyngier wrote: > On 23/11/2018 16:17, Miquel Raynal wrote: > > The IP can manage to trigger interrupts on overheat situation from all > > the sensors. > > > > However, the interrupt source changes along with the last selected > > source (ie. the last read sensor), which is an inconsistent behavior. > > Avoid possible glitches by always selecting back only one channel which > > will then be referenced as the "overheat_sensor" (arbitrarily: the first > > in the DT which has a critical trip point filled in). > > > > It is possible that the scan of all thermal zone nodes did not bring a > > critical trip point from which the overheat interrupt could be > > configured. In this case just complain but do not fail the probe. > > > > Also disable sensor switch during overheat situations because changing > > the channel while the system is too hot could clear the overheat state > > by changing the source while the temperature is still very high. > > > > Even if the overheat state is not declared, overheat interrupt must be > > cleared by reading the DFX interrupt cause _after_ the temperature has > > fallen down to the low threshold, otherwise future possible interrupts > > would not be served. A work polls the corresponding register until the > > overheat flag gets cleared in this case. > > > > Suggested-by: David Sniatkiwicz <davidsn@xxxxxxxxxxx> > > Signed-off-by: Miquel Raynal <miquel.raynal@xxxxxxxxxxx> > > --- > > drivers/thermal/armada_thermal.c | 270 ++++++++++++++++++++++++++++++- > > 1 file changed, 269 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c > > index 92f67d40f2e9..55cfaee0da77 100644 > > --- a/drivers/thermal/armada_thermal.c > > +++ b/drivers/thermal/armada_thermal.c > > @@ -26,6 +26,11 @@ > > #include <linux/iopoll.h> > > #include <linux/mfd/syscon.h> > > #include <linux/regmap.h> > > +#include <linux/interrupt.h> > > + > > +#include "thermal_core.h" > > + > > +#define TO_MCELSIUS(c) ((c) * 1000) > > > > /* Thermal Manager Control and Status Register */ > > #define PMU_TDC0_SW_RST_MASK (0x1 << 1) > > @@ -61,9 +66,13 @@ > > #define CONTROL1_TSEN_AVG_MASK 0x7 > > #define CONTROL1_EXT_TSEN_SW_RESET BIT(7) > > #define CONTROL1_EXT_TSEN_HW_RESETn BIT(8) > > +#define CONTROL1_TSEN_INT_EN BIT(25) > > +#define CONTROL1_TSEN_SELECT_OFF 21 > > +#define CONTROL1_TSEN_SELECT_MASK 0x3 > > > > #define STATUS_POLL_PERIOD_US 1000 > > #define STATUS_POLL_TIMEOUT_US 100000 > > +#define OVERHEAT_INT_POLL_DELAY_MS 1000 > > > > struct armada_thermal_data; > > > > @@ -75,7 +84,11 @@ struct armada_thermal_priv { > > /* serialize temperature reads/updates */ > > struct mutex update_lock; > > struct armada_thermal_data *data; > > + struct thermal_zone_device *overheat_sensor; > > + int interrupt_source; > > int current_channel; > > + long current_threshold; > > + long current_hysteresis; > > }; > > > > struct armada_thermal_data { > > @@ -93,12 +106,20 @@ struct armada_thermal_data { > > /* Register shift and mask to access the sensor temperature */ > > unsigned int temp_shift; > > unsigned int temp_mask; > > + unsigned int thresh_shift; > > + unsigned int hyst_shift; > > + unsigned int hyst_mask; > > u32 is_valid_bit; > > > > /* Syscon access */ > > unsigned int syscon_control0_off; > > unsigned int syscon_control1_off; > > unsigned int syscon_status_off; > > + unsigned int dfx_irq_cause_off; > > + unsigned int dfx_irq_mask_off; > > + unsigned int dfx_overheat_irq; > > + unsigned int dfx_server_irq_mask_off; > > + unsigned int dfx_server_irq_en; > > > > /* One sensor is in the thermal IC, the others are in the CPUs if any */ > > unsigned int cpu_nr; > > @@ -272,6 +293,41 @@ static bool armada_is_valid(struct armada_thermal_priv *priv) > > return reg & priv->data->is_valid_bit; > > } > > > > +static void armada_enable_overheat_interrupt(struct armada_thermal_priv *priv) > > +{ > > + struct armada_thermal_data *data = priv->data; > > + u32 reg; > > + > > + /* Clear DFX temperature IRQ cause */ > > + regmap_read(priv->syscon, data->dfx_irq_cause_off, ®); > > + > > + /* Enable DFX Temperature IRQ */ > > + regmap_read(priv->syscon, data->dfx_irq_mask_off, ®); > > + reg |= data->dfx_overheat_irq; > > + regmap_write(priv->syscon, data->dfx_irq_mask_off, reg); > > + > > + /* Enable DFX server IRQ */ > > + regmap_read(priv->syscon, data->dfx_server_irq_mask_off, ®); > > + reg |= data->dfx_server_irq_en; > > + regmap_write(priv->syscon, data->dfx_server_irq_mask_off, reg); > > + > > + /* Enable overheat interrupt */ > > + regmap_read(priv->syscon, data->syscon_control1_off, ®); > > + reg |= CONTROL1_TSEN_INT_EN; > > + regmap_write(priv->syscon, data->syscon_control1_off, reg); > > +} > > + > > +static void __maybe_unused > > +armada_disable_overheat_interrupt(struct armada_thermal_priv *priv) > > +{ > > + struct armada_thermal_data *data = priv->data; > > + u32 reg; > > + > > + regmap_read(priv->syscon, data->syscon_control1_off, ®); > > + reg &= ~CONTROL1_TSEN_INT_EN; > > + regmap_write(priv->syscon, data->syscon_control1_off, reg); > > +} > > + > > /* There is currently no board with more than one sensor per channel */ > > static int armada_select_channel(struct armada_thermal_priv *priv, int channel) > > { > > @@ -388,6 +444,16 @@ static int armada_get_temp(void *_sensor, int *temp) > > > > /* Do the actual reading */ > > ret = armada_read_sensor(priv, temp); > > + if (ret) > > + goto unlock_mutex; > > + > > + /* > > + * Select back the interrupt source channel from which a potential > > + * critical trip point has been set. > > + */ > > + ret = armada_select_channel(priv, priv->interrupt_source); > > + if (ret) > > + goto unlock_mutex; > > Do you really need a "goto" to the immediately following line? > Agreed, that goto can be removed.. > > > > unlock_mutex: > > mutex_unlock(&priv->update_lock); > > @@ -399,6 +465,121 @@ static struct thermal_zone_of_device_ops of_ops = { > > .get_temp = armada_get_temp, > > }; > > > > +static unsigned int armada_mc_to_reg_temp(struct armada_thermal_data *data, > > + unsigned int temp_mc) > > +{ > > + s64 b = data->coef_b; > > + s64 m = data->coef_m; > > + s64 div = data->coef_div; > > + unsigned int sample; > > + > > + if (data->inverted) > > + sample = div_s64(((temp_mc * div) + b), m); > > + else > > + sample = div_s64((b - (temp_mc * div)), m); > > + > > + return sample & data->temp_mask; > > +} > > + > > +static unsigned int armada_mc_to_reg_hyst(struct armada_thermal_data *data, > > + unsigned int hyst_mc) > > +{ > > + /* > > + * The documentation states: > > + * high/low watermark = threshold +/- 0.4761 * 2^(hysteresis + 2) > > + * which is the mathematical derivation for: > > + * 0x0 <=> 1.9°C, 0x1 <=> 3.8°C, 0x2 <=> 7.6°C, 0x3 <=> 15.2 > > + */ > > + unsigned int hyst_levels_mc[] = {1900, 3800, 7600, 15200}; > > Why isn't this a static array rather than something you have to push on > the stack each time you execute this function? Also, where those constants come from? Is this something the driver needs to be updated when new chips are added for instance? > > > + int i; > > + > > + /* > > + * We will always take the smallest possible hysteresis to avoid risking > > + * the hardware integrity by enlarging the threshold by +8°C in the > > + * worst case. > > + */ > > + for (i = ARRAY_SIZE(hyst_levels_mc) - 1; i > 0; i--) > > + if (hyst_mc >= hyst_levels_mc[i]) > > + break; > > + > > + return i & data->hyst_mask; > > +} > > + > > +static void armada_set_overheat_thresholds(struct armada_thermal_priv *priv, > > + int thresh_mc, int hyst_mc) > > +{ > > + struct armada_thermal_data *data = priv->data; > > + unsigned int threshold = armada_mc_to_reg_temp(data, thresh_mc); > > + unsigned int hysteresis = armada_mc_to_reg_hyst(data, hyst_mc); > > + u32 ctrl1; > > + > > + regmap_read(priv->syscon, data->syscon_control1_off, &ctrl1); > > + > > + /* Set Threshold */ > > + if (thresh_mc >= 0) { > > + ctrl1 &= ~(data->temp_mask << data->thresh_shift); > > + ctrl1 |= threshold << data->thresh_shift; > > + priv->current_threshold = thresh_mc; > > + } > > + > > + /* Set Hysteresis */ > > + if (hyst_mc >= 0) { > > + ctrl1 &= ~(data->hyst_mask << data->hyst_shift); > > + ctrl1 |= hysteresis << data->hyst_shift; > > + priv->current_hysteresis = hyst_mc; > > + } > > + > > + regmap_write(priv->syscon, data->syscon_control1_off, ctrl1); > > +} > > + > > +static irqreturn_t armada_overheat_isr(int irq, void *blob) > > +{ > > + /* > > + * Disable the IRQ and continue in thread context (thermal core > > + * notification and temperature monitoring). > > + */ > > + disable_irq_nosync(irq); > > + > > + return IRQ_WAKE_THREAD; > > +} > > + > > +static irqreturn_t armada_overheat_isr_thread(int irq, void *blob) > > +{ > > + struct armada_thermal_priv *priv = (struct armada_thermal_priv *)blob; > > Unnecessary cast. > > > + int low_threshold = priv->current_threshold - priv->current_hysteresis; > > + int temperature; > > + u32 dummy; > > + int ret; > > + > > + /* Notify the core in thread context */ > > + thermal_zone_device_update(priv->overheat_sensor, > > + THERMAL_EVENT_UNSPECIFIED); > > + > > + /* > > + * The overheat interrupt must be cleared by reading the DFX interrupt > > + * cause _after_ the temperature has fallen down to the low threshold. > > + * Otherwise future interrupts might not be served. > > + */ > > + do { > > + msleep(OVERHEAT_INT_POLL_DELAY_MS); > > + mutex_lock(&priv->update_lock); > > + ret = armada_read_sensor(priv, &temperature); > > + mutex_unlock(&priv->update_lock); > > + if (ret) > > + return ret; > > How will the interrupt fire again if you exit without re-enabling it? Is > "ret" guaranteed to be a valid irqreturn_t? > > > + } while (temperature >= low_threshold); > > + > > + regmap_read(priv->syscon, priv->data->dfx_irq_cause_off, &dummy); > > + > > + /* Notify the thermal core that the temperature is acceptable again */ > > + thermal_zone_device_update(priv->overheat_sensor, > > + THERMAL_EVENT_UNSPECIFIED); > > + > > + enable_irq(irq); > > + > > + return IRQ_HANDLED; > > +} > > + > > static const struct armada_thermal_data armadaxp_data = { > > .init = armadaxp_init, > > .temp_shift = 10, > > @@ -454,6 +635,9 @@ static const struct armada_thermal_data armada_ap806_data = { > > .is_valid_bit = BIT(16), > > .temp_shift = 0, > > .temp_mask = 0x3ff, > > + .thresh_shift = 3, > > + .hyst_shift = 19, > > + .hyst_mask = 0x3, > > .coef_b = -150000LL, > > .coef_m = 423ULL, > > .coef_div = 1, > > @@ -462,6 +646,11 @@ static const struct armada_thermal_data armada_ap806_data = { > > .syscon_control0_off = 0x84, > > .syscon_control1_off = 0x88, > > .syscon_status_off = 0x8C, > > + .dfx_irq_cause_off = 0x108, > > + .dfx_irq_mask_off = 0x10C, > > + .dfx_overheat_irq = BIT(22), > > + .dfx_server_irq_mask_off = 0x104, > > + .dfx_server_irq_en = BIT(1), > > .cpu_nr = 4, > > }; > > > > @@ -470,6 +659,9 @@ static const struct armada_thermal_data armada_cp110_data = { > > .is_valid_bit = BIT(10), > > .temp_shift = 0, > > .temp_mask = 0x3ff, > > + .thresh_shift = 16, > > + .hyst_shift = 26, > > + .hyst_mask = 0x3, > > .coef_b = 1172499100ULL, > > .coef_m = 2000096ULL, > > .coef_div = 4201, > > @@ -477,6 +669,11 @@ static const struct armada_thermal_data armada_cp110_data = { > > .syscon_control0_off = 0x70, > > .syscon_control1_off = 0x74, > > .syscon_status_off = 0x78, > > + .dfx_irq_cause_off = 0x108, > > + .dfx_irq_mask_off = 0x10C, > > + .dfx_overheat_irq = BIT(20), > > + .dfx_server_irq_mask_off = 0x104, > > + .dfx_server_irq_en = BIT(1), > > }; > > > > static const struct of_device_id armada_thermal_id_table[] = { > > @@ -592,6 +789,48 @@ static void armada_set_sane_name(struct platform_device *pdev, > > } while (insane_char); > > } > > > > +/* > > + * The IP can manage to trigger interrupts on overheat situation from all the > > + * sensors. However, the interrupt source changes along with the last selected > > + * source (ie. the last read sensor), which is an inconsistent behavior. Avoid > > + * possible glitches by always selecting back only one channel (arbitrarily: the > > + * first in the DT which has a critical trip point). We also disable sensor > > The DT doesn't seem to *mandate* this. Should it? Does it also mean that > you can only have a single critical point that will generate an > interrupt on overheat? > Also, keep in mind that thermal core will shutdown the system when it sees any zone with temp crossing the first critical trip. > > + * switch during overheat situations. > > + */ > > +static int armada_configure_overheat_int(struct armada_thermal_priv *priv, > > + struct thermal_zone_device *tz, > > + int sensor_id) > > +{ > > + /* Retrieve the critical trip point to enable the overheat interrupt */ > > + const struct thermal_trip *trips = of_thermal_get_trip_points(tz); > > + int ret; > > + int i; > > + > > + if (!trips) > > + return -EINVAL; > > + > > + for (i = 0; i < of_thermal_get_ntrips(tz); i++) > > + if (trips[i].type == THERMAL_TRIP_CRITICAL) > > + break; > > + > > + if (i == of_thermal_get_ntrips(tz)) > > + return -EINVAL; > > + > > + ret = armada_select_channel(priv, sensor_id); > > + if (ret) > > + return ret; > > + > > + armada_set_overheat_thresholds(priv, > > + trips[i].temperature, > > + trips[i].hysteresis); > > + priv->overheat_sensor = tz; > > + priv->interrupt_source = sensor_id; > > + > > + armada_enable_overheat_interrupt(priv); > > + > > + return 0; > > +} > > + > > static int armada_thermal_probe(struct platform_device *pdev) > > { > > struct thermal_zone_device *tz; > > @@ -599,7 +838,7 @@ static int armada_thermal_probe(struct platform_device *pdev) > > struct armada_drvdata *drvdata; > > const struct of_device_id *match; > > struct armada_thermal_priv *priv; > > - int sensor_id; > > + int sensor_id, irq; > > int ret; > > > > match = of_match_device(armada_thermal_id_table, &pdev->dev); > > @@ -669,6 +908,23 @@ static int armada_thermal_probe(struct platform_device *pdev) > > drvdata->data.priv = priv; > > platform_set_drvdata(pdev, drvdata); > > > > + irq = platform_get_irq(pdev, 0); > > + if (irq == -EPROBE_DEFER) > > + return irq; > > + > > + /* The overheat interrupt feature is not mandatory */ > > + if (irq >= 0) { > > 0 is not a valid interrupt. > > > + ret = devm_request_threaded_irq(&pdev->dev, irq, > > + armada_overheat_isr, > > + armada_overheat_isr_thread, > > + 0, NULL, priv); > > + if (ret) { > > + dev_err(&pdev->dev, "Cannot request threaded IRQ %d\n", > > + irq); > > + return ret; > > + } > > + } > > + > > /* > > * There is one channel for the IC and one per CPU (if any), each > > * channel has one sensor. > > @@ -692,8 +948,20 @@ static int armada_thermal_probe(struct platform_device *pdev) > > devm_kfree(&pdev->dev, sensor); > > continue; > > } > > + > > + /* > > + * The first channel that has a critical trip point registered > > + * in the DT will serve as interrupt source. Others possible > > + * critical trip points will simply be ignored by the driver. > > + */ > > + if (irq >= 0 && !priv->overheat_sensor) > > Same here. > > > + armada_configure_overheat_int(priv, tz, sensor->id); > > } > > > > + /* Just complain if no overheat interrupt was set up */ > > + if (!priv->overheat_sensor) > > + dev_warn(&pdev->dev, "Overheat interrupt not available\n"); > > + > > return 0; > > } > > > > > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny...