Re: [PATCH] iio: pressure: dps310: Reset chip if MEAS_CFG is corrupt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 18 May 2022 09:07:08 -0500
Eddie James <eajames@xxxxxxxxxxxxx> wrote:

> On 5/14/22 10:02, Jonathan Cameron wrote:
> > On Wed, 11 May 2022 14:27:24 -0500
> > Eddie James <eajames@xxxxxxxxxxxxx> wrote:
> >  
> >> Corruption of the MEAS_CFG register has been observed soon after
> >> system boot. In order to recover this scenario, check MEAS_CFG if
> >> measurement isn't ready, and if it's incorrect, reset the DPS310
> >> and write all the necessary registers.
> >>
> >> Signed-off-by: Eddie James <eajames@xxxxxxxxxxxxx>  
> > It's a large patch, so not an ideal 'fix' to backport, but
> > if we need to for platforms to work we should do so.
> >
> > Hence, please add a Fixes tag (which I'm guessing will be the driver
> > being added in the first place).
> >
> > Whilst tidying up might be nice, we shouldn't do refactoring that's not
> > strictly necessary in a fix patch.  Hence I'd prefer this as a two patch
> > series. Refactor with no functional changes, then the actual change.  
> 
> 
> Sure thing.
> 
> 
> >
> > A couple of minor queries inline,
> >
> > Thanks,
> >
> > Jonathan
> >
> >
> >  
> >> ---
> >>   drivers/iio/pressure/dps310.c | 280 +++++++++++++++++++++-------------
> >>   1 file changed, 173 insertions(+), 107 deletions(-)
> >>
> >> diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
> >> index 36fb7ae0d0a9..39f84614f44e 100644
> >> --- a/drivers/iio/pressure/dps310.c
> >> +++ b/drivers/iio/pressure/dps310.c
> >> @@ -159,6 +159,106 @@ static int dps310_get_coefs(struct dps310_data *data)
> >>   	return 0;
> >>   }
> >>   
> >> +/*
> >> + * Some verions of chip will read temperatures in the ~60C range when
> >> + * its actually ~20C. This is the manufacturer recommended workaround
> >> + * to correct the issue. The registers used below are undocumented.
> >> + */
> >> +static int dps310_temp_workaround(struct dps310_data *data)
> >> +{
> >> +	int rc;
> >> +	int reg;
> >> +
> >> +	rc = regmap_read(data->regmap, 0x32, &reg);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/*
> >> +	 * If bit 1 is set then the device is okay, and the workaround does not
> >> +	 * need to be applied
> >> +	 */
> >> +	if (reg & BIT(1))
> >> +		return 0;
> >> +
> >> +	rc = regmap_write(data->regmap, 0x0e, 0xA5);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	rc = regmap_write(data->regmap, 0x0f, 0x96);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	rc = regmap_write(data->regmap, 0x62, 0x02);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	rc = regmap_write(data->regmap, 0x0e, 0x00);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	return regmap_write(data->regmap, 0x0f, 0x00);
> >> +}
> >> +
> >> +static int dps310_startup(struct dps310_data *data)
> >> +{
> >> +	int rc;
> >> +	int ready;
> >> +
> >> +	/*
> >> +	 * Set up pressure sensor in single sample, one measurement per second
> >> +	 * mode
> >> +	 */
> >> +	rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/*
> >> +	 * Set up external (MEMS) temperature sensor in single sample, one
> >> +	 * measurement per second mode
> >> +	 */
> >> +	rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/* Temp and pressure shifts are disabled when PRC <= 8 */
> >> +	rc = regmap_write_bits(data->regmap, DPS310_CFG_REG,
> >> +			       DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/* MEAS_CFG doesn't update correctly unless first written with 0 */
> >> +	rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
> >> +			       DPS310_MEAS_CTRL_BITS, 0);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/* Turn on temperature and pressure measurement in the background */
> >> +	rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
> >> +			       DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN |
> >> +			       DPS310_TEMP_EN | DPS310_BACKGROUND);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	/*
> >> +	 * Calibration coefficients required for reporting temperature.
> >> +	 * They are available 40ms after the device has started
> >> +	 */
> >> +	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> >> +				      ready & DPS310_COEF_RDY, 10000, 40000);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	rc = dps310_get_coefs(data);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	rc = dps310_temp_workaround(data);
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>   static int dps310_get_pres_precision(struct dps310_data *data)
> >>   {
> >>   	int rc;
> >> @@ -297,6 +397,38 @@ static int dps310_get_temp_k(struct dps310_data *data)
> >>   	return scale_factors[ilog2(rc)];
> >>   }
> >>   
> >> +/* Called with lock held */
> >> +static int dps310_verify_meas_cfg(struct dps310_data *data, int ready_bit)  
> > Hmm. I'm not immediately coming up with a better name, but it does
> > seem odd that a verify function would do a reset.  
> 
> 
> Sure, I'll try and think of a better name.
> 
> 
> >  
> >> +{
> >> +	int en = DPS310_PRS_EN | DPS310_TEMP_EN | DPS310_BACKGROUND;
> >> +	int meas_cfg;
> >> +	int rc = regmap_read(data->regmap, DPS310_MEAS_CFG, &meas_cfg);
> >> +
> >> +	if (rc < 0)
> >> +		return rc;
> >> +
> >> +	if (meas_cfg & ready_bit)
> >> +		return 0;
> >> +
> >> +	if ((meas_cfg & en) != en) {
> >> +		/* DPS310 register state corrupt, better start from scratch */
> >> +		rc = regmap_write(data->regmap, DPS310_RESET, DPS310_RESET_MAGIC);
> >> +		if (rc < 0)
> >> +			return rc;
> >> +
> >> +		/* Wait for device chip access: 2.5ms in specification */
> >> +		usleep_range(2500, 12000);
> >> +		rc = dps310_startup(data);
> >> +		if (rc)
> >> +			return rc;
> >> +
> >> +		dev_info(&data->client->dev,
> >> +			 "recovered from corrupted MEAS_CFG=%02x\n", meas_cfg);
> >> +	}
> >> +
> >> +	return 1;
> >> +}
> >> +
> >>   static int dps310_read_pres_raw(struct dps310_data *data)
> >>   {
> >>   	int rc;
> >> @@ -309,15 +441,25 @@ static int dps310_read_pres_raw(struct dps310_data *data)
> >>   	if (mutex_lock_interruptible(&data->lock))
> >>   		return -EINTR;
> >>   
> >> -	rate = dps310_get_pres_samp_freq(data);
> >> -	timeout = DPS310_POLL_TIMEOUT_US(rate);
> >> -
> >> -	/* Poll for sensor readiness; base the timeout upon the sample rate. */
> >> -	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> >> -				      ready & DPS310_PRS_RDY,
> >> -				      DPS310_POLL_SLEEP_US(timeout), timeout);
> >> -	if (rc)
> >> -		goto done;
> >> +	rc = dps310_verify_meas_cfg(data, DPS310_PRS_RDY);
> >> +	if (rc) {  
> > So at this point we potentially reset. Is there a race condition against
> > the device register state corrupting again?  If so I guess we are relying
> > on the timeout and userspace trying again.  Maybe worth adding a comment
> > if that is the case.  
> 
> 
> Well, I don't know what causes the corruption at this point. The lock is 
> held, so nothing else should touch the device, so I'm assuming it won't 
> get corrupted. And assuming the reset works, then the sensor measurement 
> should return valid data now, so any problem should be invisible to 
> userspace (except this read will take a long time).

Fair enough - I was guessing we were looking at noise / brownout or
similar causing a loss of register value.  If we think it's related
to actual traffic (or don't know :) then I'm fine with this unless
we get reports of more problems even with this 'hack' in place.

Thanks,

Jonathan

> 
> 
> Thanks for the review.
> 
> Eddie
> 
> 
> >  
> >> +		if (rc < 0)
> >> +			goto done;
> >> +
> >> +		rate = dps310_get_pres_samp_freq(data);
> >> +		timeout = DPS310_POLL_TIMEOUT_US(rate);
> >> +
> >> +		/*
> >> +		 * Poll for sensor readiness; base the timeout upon the sample
> >> +		 * rate.
> >> +		 */
> >> +		rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
> >> +					      ready, ready & DPS310_PRS_RDY,
> >> +					      DPS310_POLL_SLEEP_US(timeout),
> >> +					      timeout);
> >> +		if (rc)
> >> +			goto done;
> >> +	}
> >>   
> >>   	rc = regmap_bulk_read(data->regmap, DPS310_PRS_BASE, val, sizeof(val));
> >>   	if (rc < 0)
> >> @@ -358,15 +500,25 @@ static int dps310_read_temp_raw(struct dps310_data *data)
> >>   	if (mutex_lock_interruptible(&data->lock))
> >>   		return -EINTR;
> >>   
> >> -	rate = dps310_get_temp_samp_freq(data);
> >> -	timeout = DPS310_POLL_TIMEOUT_US(rate);
> >> -
> >> -	/* Poll for sensor readiness; base the timeout upon the sample rate. */
> >> -	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> >> -				      ready & DPS310_TMP_RDY,
> >> -				      DPS310_POLL_SLEEP_US(timeout), timeout);
> >> -	if (rc < 0)
> >> -		goto done;
> >> +	rc = dps310_verify_meas_cfg(data, DPS310_TMP_RDY);
> >> +	if (rc) {
> >> +		if (rc < 0)
> >> +			goto done;
> >> +
> >> +		rate = dps310_get_temp_samp_freq(data);
> >> +		timeout = DPS310_POLL_TIMEOUT_US(rate);
> >> +
> >> +		/*
> >> +		 * Poll for sensor readiness; base the timeout upon the sample
> >> +		 * rate.
> >> +		 */
> >> +		rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
> >> +					      ready, ready & DPS310_TMP_RDY,
> >> +					      DPS310_POLL_SLEEP_US(timeout),
> >> +					      timeout);
> >> +		if (rc < 0)
> >> +			goto done;
> >> +	}
> >>   
> >>   	rc = dps310_read_temp_ready(data);
> >>   
> >> @@ -677,52 +829,12 @@ static const struct iio_info dps310_info = {
> >>   	.write_raw = dps310_write_raw,
> >>   };
> >>   
> >> -/*
> >> - * Some verions of chip will read temperatures in the ~60C range when
> >> - * its actually ~20C. This is the manufacturer recommended workaround
> >> - * to correct the issue. The registers used below are undocumented.
> >> - */
> >> -static int dps310_temp_workaround(struct dps310_data *data)
> >> -{
> >> -	int rc;
> >> -	int reg;
> >> -
> >> -	rc = regmap_read(data->regmap, 0x32, &reg);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	/*
> >> -	 * If bit 1 is set then the device is okay, and the workaround does not
> >> -	 * need to be applied
> >> -	 */
> >> -	if (reg & BIT(1))
> >> -		return 0;
> >> -
> >> -	rc = regmap_write(data->regmap, 0x0e, 0xA5);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	rc = regmap_write(data->regmap, 0x0f, 0x96);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	rc = regmap_write(data->regmap, 0x62, 0x02);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	rc = regmap_write(data->regmap, 0x0e, 0x00);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	return regmap_write(data->regmap, 0x0f, 0x00);
> >> -}
> >> -
> >>   static int dps310_probe(struct i2c_client *client,
> >>   			const struct i2c_device_id *id)
> >>   {
> >>   	struct dps310_data *data;
> >>   	struct iio_dev *iio;
> >> -	int rc, ready;
> >> +	int rc;
> >>   
> >>   	iio = devm_iio_device_alloc(&client->dev,  sizeof(*data));
> >>   	if (!iio)
> >> @@ -747,54 +859,8 @@ static int dps310_probe(struct i2c_client *client,
> >>   	if (rc)
> >>   		return rc;
> >>   
> >> -	/*
> >> -	 * Set up pressure sensor in single sample, one measurement per second
> >> -	 * mode
> >> -	 */
> >> -	rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0);  
> > Lack of error checking in original code is odd. Deliberate or not is the
> > question... (probably not deliberate as rc = is there).
> >  
> >> -
> >> -	/*
> >> -	 * Set up external (MEMS) temperature sensor in single sample, one
> >> -	 * measurement per second mode
> >> -	 */
> >> -	rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	/* Temp and pressure shifts are disabled when PRC <= 8 */
> >> -	rc = regmap_write_bits(data->regmap, DPS310_CFG_REG,
> >> -			       DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	/* MEAS_CFG doesn't update correctly unless first written with 0 */
> >> -	rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
> >> -			       DPS310_MEAS_CTRL_BITS, 0);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	/* Turn on temperature and pressure measurement in the background */
> >> -	rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
> >> -			       DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN |
> >> -			       DPS310_TEMP_EN | DPS310_BACKGROUND);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	/*
> >> -	 * Calibration coefficients required for reporting temperature.
> >> -	 * They are available 40ms after the device has started
> >> -	 */
> >> -	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> >> -				      ready & DPS310_COEF_RDY, 10000, 40000);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	rc = dps310_get_coefs(data);
> >> -	if (rc < 0)
> >> -		return rc;
> >> -
> >> -	rc = dps310_temp_workaround(data);
> >> -	if (rc < 0)
> >> +	rc = dps310_startup(data);
> >> +	if (rc)
> >>   		return rc;
> >>   
> >>   	rc = devm_iio_device_register(&client->dev, iio);  




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Input]     [Linux Kernel]     [Linux SCSI]     [X.org]

  Powered by Linux