Re: [PATCH v2 1/6] thermal: armada: add overheat interrupt support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 23, 2018 at 04:55:59PM +0000, Marc Zyngier wrote:
> On 23/11/2018 16:17, Miquel Raynal wrote:
> > The IP can manage to trigger interrupts on overheat situation from all
> > the sensors.
> > 
> > However, the interrupt source changes along with the last selected
> > source (ie. the last read sensor), which is an inconsistent behavior.
> > Avoid possible glitches by always selecting back only one channel which
> > will then be referenced as the "overheat_sensor" (arbitrarily: the first
> > in the DT which has a critical trip point filled in).
> > 
> > It is possible that the scan of all thermal zone nodes did not bring a
> > critical trip point from which the overheat interrupt could be
> > configured. In this case just complain but do not fail the probe.
> > 
> > Also disable sensor switch during overheat situations because changing
> > the channel while the system is too hot could clear the overheat state
> > by changing the source while the temperature is still very high.
> > 
> > Even if the overheat state is not declared, overheat interrupt must be
> > cleared by reading the DFX interrupt cause _after_ the temperature has
> > fallen down to the low threshold, otherwise future possible interrupts
> > would not be served. A work polls the corresponding register until the
> > overheat flag gets cleared in this case.
> > 
> > Suggested-by: David Sniatkiwicz <davidsn@xxxxxxxxxxx>
> > Signed-off-by: Miquel Raynal <miquel.raynal@xxxxxxxxxxx>
> > ---
> >  drivers/thermal/armada_thermal.c | 270 ++++++++++++++++++++++++++++++-
> >  1 file changed, 269 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
> > index 92f67d40f2e9..55cfaee0da77 100644
> > --- a/drivers/thermal/armada_thermal.c
> > +++ b/drivers/thermal/armada_thermal.c
> > @@ -26,6 +26,11 @@
> >  #include <linux/iopoll.h>
> >  #include <linux/mfd/syscon.h>
> >  #include <linux/regmap.h>
> > +#include <linux/interrupt.h>
> > +
> > +#include "thermal_core.h"
> > +
> > +#define TO_MCELSIUS(c)			((c) * 1000)
> >  
> >  /* Thermal Manager Control and Status Register */
> >  #define PMU_TDC0_SW_RST_MASK		(0x1 << 1)
> > @@ -61,9 +66,13 @@
> >  #define CONTROL1_TSEN_AVG_MASK		0x7
> >  #define CONTROL1_EXT_TSEN_SW_RESET	BIT(7)
> >  #define CONTROL1_EXT_TSEN_HW_RESETn	BIT(8)
> > +#define CONTROL1_TSEN_INT_EN		BIT(25)
> > +#define CONTROL1_TSEN_SELECT_OFF	21
> > +#define CONTROL1_TSEN_SELECT_MASK	0x3
> >  
> >  #define STATUS_POLL_PERIOD_US		1000
> >  #define STATUS_POLL_TIMEOUT_US		100000
> > +#define OVERHEAT_INT_POLL_DELAY_MS	1000
> >  
> >  struct armada_thermal_data;
> >  
> > @@ -75,7 +84,11 @@ struct armada_thermal_priv {
> >  	/* serialize temperature reads/updates */
> >  	struct mutex update_lock;
> >  	struct armada_thermal_data *data;
> > +	struct thermal_zone_device *overheat_sensor;
> > +	int interrupt_source;
> >  	int current_channel;
> > +	long current_threshold;
> > +	long current_hysteresis;
> >  };
> >  
> >  struct armada_thermal_data {
> > @@ -93,12 +106,20 @@ struct armada_thermal_data {
> >  	/* Register shift and mask to access the sensor temperature */
> >  	unsigned int temp_shift;
> >  	unsigned int temp_mask;
> > +	unsigned int thresh_shift;
> > +	unsigned int hyst_shift;
> > +	unsigned int hyst_mask;
> >  	u32 is_valid_bit;
> >  
> >  	/* Syscon access */
> >  	unsigned int syscon_control0_off;
> >  	unsigned int syscon_control1_off;
> >  	unsigned int syscon_status_off;
> > +	unsigned int dfx_irq_cause_off;
> > +	unsigned int dfx_irq_mask_off;
> > +	unsigned int dfx_overheat_irq;
> > +	unsigned int dfx_server_irq_mask_off;
> > +	unsigned int dfx_server_irq_en;
> >  
> >  	/* One sensor is in the thermal IC, the others are in the CPUs if any */
> >  	unsigned int cpu_nr;
> > @@ -272,6 +293,41 @@ static bool armada_is_valid(struct armada_thermal_priv *priv)
> >  	return reg & priv->data->is_valid_bit;
> >  }
> >  
> > +static void armada_enable_overheat_interrupt(struct armada_thermal_priv *priv)
> > +{
> > +	struct armada_thermal_data *data = priv->data;
> > +	u32 reg;
> > +
> > +	/* Clear DFX temperature IRQ cause */
> > +	regmap_read(priv->syscon, data->dfx_irq_cause_off, &reg);
> > +
> > +	/* Enable DFX Temperature IRQ */
> > +	regmap_read(priv->syscon, data->dfx_irq_mask_off, &reg);
> > +	reg |= data->dfx_overheat_irq;
> > +	regmap_write(priv->syscon, data->dfx_irq_mask_off, reg);
> > +
> > +	/* Enable DFX server IRQ */
> > +	regmap_read(priv->syscon, data->dfx_server_irq_mask_off, &reg);
> > +	reg |= data->dfx_server_irq_en;
> > +	regmap_write(priv->syscon, data->dfx_server_irq_mask_off, reg);
> > +
> > +	/* Enable overheat interrupt */
> > +	regmap_read(priv->syscon, data->syscon_control1_off, &reg);
> > +	reg |= CONTROL1_TSEN_INT_EN;
> > +	regmap_write(priv->syscon, data->syscon_control1_off, reg);
> > +}
> > +
> > +static void __maybe_unused
> > +armada_disable_overheat_interrupt(struct armada_thermal_priv *priv)
> > +{
> > +	struct armada_thermal_data *data = priv->data;
> > +	u32 reg;
> > +
> > +	regmap_read(priv->syscon, data->syscon_control1_off, &reg);
> > +	reg &= ~CONTROL1_TSEN_INT_EN;
> > +	regmap_write(priv->syscon, data->syscon_control1_off, reg);
> > +}
> > +
> >  /* There is currently no board with more than one sensor per channel */
> >  static int armada_select_channel(struct armada_thermal_priv *priv, int channel)
> >  {
> > @@ -388,6 +444,16 @@ static int armada_get_temp(void *_sensor, int *temp)
> >  
> >  	/* Do the actual reading */
> >  	ret = armada_read_sensor(priv, temp);
> > +	if (ret)
> > +		goto unlock_mutex;
> > +
> > +	/*
> > +	 * Select back the interrupt source channel from which a potential
> > +	 * critical trip point has been set.
> > +	 */
> > +	ret = armada_select_channel(priv, priv->interrupt_source);
> > +	if (ret)
> > +		goto unlock_mutex;
> 
> Do you really need a "goto" to the immediately following line?
> 

Agreed, that goto can be removed..

> >  
> >  unlock_mutex:
> >  	mutex_unlock(&priv->update_lock);
> > @@ -399,6 +465,121 @@ static struct thermal_zone_of_device_ops of_ops = {
> >  	.get_temp = armada_get_temp,
> >  };
> >  
> > +static unsigned int armada_mc_to_reg_temp(struct armada_thermal_data *data,
> > +					  unsigned int temp_mc)
> > +{
> > +	s64 b = data->coef_b;
> > +	s64 m = data->coef_m;
> > +	s64 div = data->coef_div;
> > +	unsigned int sample;
> > +
> > +	if (data->inverted)
> > +		sample = div_s64(((temp_mc * div) + b), m);
> > +	else
> > +		sample = div_s64((b - (temp_mc * div)), m);
> > +
> > +	return sample & data->temp_mask;
> > +}
> > +
> > +static unsigned int armada_mc_to_reg_hyst(struct armada_thermal_data *data,
> > +					  unsigned int hyst_mc)
> > +{
> > +	/*
> > +	 * The documentation states:
> > +	 * high/low watermark = threshold +/- 0.4761 * 2^(hysteresis + 2)
> > +	 * which is the mathematical derivation for:
> > +	 * 0x0 <=> 1.9°C, 0x1 <=> 3.8°C, 0x2 <=> 7.6°C, 0x3 <=> 15.2
> > +	 */
> > +	unsigned int hyst_levels_mc[] = {1900, 3800, 7600, 15200};
> 
> Why isn't this a static array rather than something you have to push on
> the stack each time you execute this function?

Also, where those constants come from? Is this something the driver
needs to be updated when new chips are added for instance?

> 
> > +	int i;
> > +
> > +	/*
> > +	 * We will always take the smallest possible hysteresis to avoid risking
> > +	 * the hardware integrity by enlarging the threshold by +8°C in the
> > +	 * worst case.
> > +	 */
> > +	for (i = ARRAY_SIZE(hyst_levels_mc) - 1; i > 0; i--)
> > +		if (hyst_mc >= hyst_levels_mc[i])
> > +			break;
> > +
> > +	return i & data->hyst_mask;
> > +}
> > +
> > +static void armada_set_overheat_thresholds(struct armada_thermal_priv *priv,
> > +					   int thresh_mc, int hyst_mc)
> > +{
> > +	struct armada_thermal_data *data = priv->data;
> > +	unsigned int threshold = armada_mc_to_reg_temp(data, thresh_mc);
> > +	unsigned int hysteresis = armada_mc_to_reg_hyst(data, hyst_mc);
> > +	u32 ctrl1;
> > +
> > +	regmap_read(priv->syscon, data->syscon_control1_off, &ctrl1);
> > +
> > +	/* Set Threshold */
> > +	if (thresh_mc >= 0) {
> > +		ctrl1 &= ~(data->temp_mask << data->thresh_shift);
> > +		ctrl1 |= threshold << data->thresh_shift;
> > +		priv->current_threshold = thresh_mc;
> > +	}
> > +
> > +	/* Set Hysteresis */
> > +	if (hyst_mc >= 0) {
> > +		ctrl1 &= ~(data->hyst_mask << data->hyst_shift);
> > +		ctrl1 |= hysteresis << data->hyst_shift;
> > +		priv->current_hysteresis = hyst_mc;
> > +	}
> > +
> > +	regmap_write(priv->syscon, data->syscon_control1_off, ctrl1);
> > +}
> > +
> > +static irqreturn_t armada_overheat_isr(int irq, void *blob)
> > +{
> > +	/*
> > +	 * Disable the IRQ and continue in thread context (thermal core
> > +	 * notification and temperature monitoring).
> > +	 */
> > +	disable_irq_nosync(irq);
> > +
> > +	return IRQ_WAKE_THREAD;
> > +}
> > +
> > +static irqreturn_t armada_overheat_isr_thread(int irq, void *blob)
> > +{
> > +	struct armada_thermal_priv *priv = (struct armada_thermal_priv *)blob;
> 
> Unnecessary cast.
> 
> > +	int low_threshold = priv->current_threshold - priv->current_hysteresis;
> > +	int temperature;
> > +	u32 dummy;
> > +	int ret;
> > +
> > +	/* Notify the core in thread context */
> > +	thermal_zone_device_update(priv->overheat_sensor,
> > +				   THERMAL_EVENT_UNSPECIFIED);
> > +
> > +	/*
> > +	 * The overheat interrupt must be cleared by reading the DFX interrupt
> > +	 * cause _after_ the temperature has fallen down to the low threshold.
> > +	 * Otherwise future interrupts might not be served.
> > +	 */
> > +	do {
> > +		msleep(OVERHEAT_INT_POLL_DELAY_MS);
> > +		mutex_lock(&priv->update_lock);
> > +		ret = armada_read_sensor(priv, &temperature);
> > +		mutex_unlock(&priv->update_lock);
> > +		if (ret)
> > +			return ret;
> 
> How will the interrupt fire again if you exit without re-enabling it? Is
> "ret" guaranteed to be a valid irqreturn_t?
> 
> > +	} while (temperature >= low_threshold);
> > +
> > +	regmap_read(priv->syscon, priv->data->dfx_irq_cause_off, &dummy);
> > +
> > +	/* Notify the thermal core that the temperature is acceptable again */
> > +	thermal_zone_device_update(priv->overheat_sensor,
> > +				   THERMAL_EVENT_UNSPECIFIED);
> > +
> > +	enable_irq(irq);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  static const struct armada_thermal_data armadaxp_data = {
> >  	.init = armadaxp_init,
> >  	.temp_shift = 10,
> > @@ -454,6 +635,9 @@ static const struct armada_thermal_data armada_ap806_data = {
> >  	.is_valid_bit = BIT(16),
> >  	.temp_shift = 0,
> >  	.temp_mask = 0x3ff,
> > +	.thresh_shift = 3,
> > +	.hyst_shift = 19,
> > +	.hyst_mask = 0x3,
> >  	.coef_b = -150000LL,
> >  	.coef_m = 423ULL,
> >  	.coef_div = 1,
> > @@ -462,6 +646,11 @@ static const struct armada_thermal_data armada_ap806_data = {
> >  	.syscon_control0_off = 0x84,
> >  	.syscon_control1_off = 0x88,
> >  	.syscon_status_off = 0x8C,
> > +	.dfx_irq_cause_off = 0x108,
> > +	.dfx_irq_mask_off = 0x10C,
> > +	.dfx_overheat_irq = BIT(22),
> > +	.dfx_server_irq_mask_off = 0x104,
> > +	.dfx_server_irq_en = BIT(1),
> >  	.cpu_nr = 4,
> >  };
> >  
> > @@ -470,6 +659,9 @@ static const struct armada_thermal_data armada_cp110_data = {
> >  	.is_valid_bit = BIT(10),
> >  	.temp_shift = 0,
> >  	.temp_mask = 0x3ff,
> > +	.thresh_shift = 16,
> > +	.hyst_shift = 26,
> > +	.hyst_mask = 0x3,
> >  	.coef_b = 1172499100ULL,
> >  	.coef_m = 2000096ULL,
> >  	.coef_div = 4201,
> > @@ -477,6 +669,11 @@ static const struct armada_thermal_data armada_cp110_data = {
> >  	.syscon_control0_off = 0x70,
> >  	.syscon_control1_off = 0x74,
> >  	.syscon_status_off = 0x78,
> > +	.dfx_irq_cause_off = 0x108,
> > +	.dfx_irq_mask_off = 0x10C,
> > +	.dfx_overheat_irq = BIT(20),
> > +	.dfx_server_irq_mask_off = 0x104,
> > +	.dfx_server_irq_en = BIT(1),
> >  };
> >  
> >  static const struct of_device_id armada_thermal_id_table[] = {
> > @@ -592,6 +789,48 @@ static void armada_set_sane_name(struct platform_device *pdev,
> >  	} while (insane_char);
> >  }
> >  
> > +/*
> > + * The IP can manage to trigger interrupts on overheat situation from all the
> > + * sensors. However, the interrupt source changes along with the last selected
> > + * source (ie. the last read sensor), which is an inconsistent behavior. Avoid
> > + * possible glitches by always selecting back only one channel (arbitrarily: the
> > + * first in the DT which has a critical trip point). We also disable sensor
> 
> The DT doesn't seem to *mandate* this. Should it? Does it also mean that
> you can only have a single critical point that will generate an
> interrupt on overheat?
> 

Also, keep in mind that thermal core will shutdown the system when it
sees any zone with temp crossing the first critical trip.


> > + * switch during overheat situations.
> > + */
> > +static int armada_configure_overheat_int(struct armada_thermal_priv *priv,
> > +					 struct thermal_zone_device *tz,
> > +					 int sensor_id)
> > +{
> > +	/* Retrieve the critical trip point to enable the overheat interrupt */
> > +	const struct thermal_trip *trips = of_thermal_get_trip_points(tz);
> > +	int ret;
> > +	int i;
> > +
> > +	if (!trips)
> > +		return -EINVAL;
> > +
> > +	for (i = 0; i < of_thermal_get_ntrips(tz); i++)
> > +		if (trips[i].type == THERMAL_TRIP_CRITICAL)
> > +			break;
> > +
> > +	if (i == of_thermal_get_ntrips(tz))
> > +		return -EINVAL;
> > +
> > +	ret = armada_select_channel(priv, sensor_id);
> > +	if (ret)
> > +		return ret;
> > +
> > +	armada_set_overheat_thresholds(priv,
> > +				       trips[i].temperature,
> > +				       trips[i].hysteresis);
> > +	priv->overheat_sensor = tz;
> > +	priv->interrupt_source = sensor_id;
> > +
> > +	armada_enable_overheat_interrupt(priv);
> > +
> > +	return 0;
> > +}
> > +
> >  static int armada_thermal_probe(struct platform_device *pdev)
> >  {
> >  	struct thermal_zone_device *tz;
> > @@ -599,7 +838,7 @@ static int armada_thermal_probe(struct platform_device *pdev)
> >  	struct armada_drvdata *drvdata;
> >  	const struct of_device_id *match;
> >  	struct armada_thermal_priv *priv;
> > -	int sensor_id;
> > +	int sensor_id, irq;
> >  	int ret;
> >  
> >  	match = of_match_device(armada_thermal_id_table, &pdev->dev);
> > @@ -669,6 +908,23 @@ static int armada_thermal_probe(struct platform_device *pdev)
> >  	drvdata->data.priv = priv;
> >  	platform_set_drvdata(pdev, drvdata);
> >  
> > +	irq = platform_get_irq(pdev, 0);
> > +	if (irq == -EPROBE_DEFER)
> > +		return irq;
> > +
> > +	/* The overheat interrupt feature is not mandatory */
> > +	if (irq >= 0) {
> 
> 0 is not a valid interrupt.
> 
> > +		ret = devm_request_threaded_irq(&pdev->dev, irq,
> > +						armada_overheat_isr,
> > +						armada_overheat_isr_thread,
> > +						0, NULL, priv);
> > +		if (ret) {
> > +			dev_err(&pdev->dev, "Cannot request threaded IRQ %d\n",
> > +				irq);
> > +			return ret;
> > +		}
> > +	}
> > +
> >  	/*
> >  	 * There is one channel for the IC and one per CPU (if any), each
> >  	 * channel has one sensor.
> > @@ -692,8 +948,20 @@ static int armada_thermal_probe(struct platform_device *pdev)
> >  			devm_kfree(&pdev->dev, sensor);
> >  			continue;
> >  		}
> > +
> > +		/*
> > +		 * The first channel that has a critical trip point registered
> > +		 * in the DT will serve as interrupt source. Others possible
> > +		 * critical trip points will simply be ignored by the driver.
> > +		 */
> > +		if (irq >= 0 && !priv->overheat_sensor)
> 
> Same here.
> 
> > +			armada_configure_overheat_int(priv, tz, sensor->id);
> >  	}
> >  
> > +	/* Just complain if no overheat interrupt was set up */
> > +	if (!priv->overheat_sensor)
> > +		dev_warn(&pdev->dev, "Overheat interrupt not available\n");
> > +
> >  	return 0;
> >  }
> >  
> > 
> 
> Thanks,
> 
> 	M.
> -- 
> Jazz is not dead. It just smells funny...



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux