On Mon, Dec 2, 2024 at 3:06 PM Heikki Krogerus <heikki.krogerus@xxxxxxxxxxxxxxx> wrote: > > Hi Łukasz, > > On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote: > > From: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx> > > > > In a suspend-resume edge case, the OPM is timing out in ucsi_resume and > > the PPM is getting stuck waiting for a command complete ack. Add a write > > timeout recovery task that will get us out of this state. > > Sorry, I did not realise this before, but this is in practice a fix > (or workaround) to the driver that you just introduced in the previous > patch. Please merge it into that. > Hi Heikki, Yes I will merge it. I deliberately sent it as a separate commit for review so that it would be easier to see the changes. > > Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx> > > Signed-off-by: Łukasz Bartosik <ukaszb@xxxxxxxxxxxx> > > --- > > drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++- > > 1 file changed, 87 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > index c588d9a57643..6daf61e7e62a 100644 > > --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > @@ -7,6 +7,7 @@ > > > > #include <linux/container_of.h> > > #include <linux/dev_printk.h> > > +#include <linux/jiffies.h> > > #include <linux/mod_devicetable.h> > > #include <linux/module.h> > > #include <linux/platform_data/cros_ec_commands.h> > > @@ -29,6 +30,9 @@ > > */ > > #define WRITE_TMO_MS 5000 > > > > +/* Number of times to attempt recovery from a write timeout before giving up. */ > > +#define WRITE_TMO_CTR_MAX 5 > > + > > struct cros_ucsi_data { > > struct device *dev; > > struct ucsi *ucsi; > > @@ -36,6 +40,8 @@ struct cros_ucsi_data { > > struct cros_ec_device *ec; > > struct notifier_block nb; > > struct work_struct work; > > + struct delayed_work write_tmo; > > + int tmo_counter; > > > > struct completion complete; > > unsigned long flags; > > @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) > > return 0; > > } > > > > +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) > > +{ > > + struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); > > + int ret; > > + > > + ret = ucsi_sync_control_common(ucsi, cmd); > > + if (ret) > > + goto out; > > + > > + /* Successful write. Cancel any pending recovery work. */ > > + cancel_delayed_work_sync(&udata->write_tmo); > > + > > + return 0; > > +out: > > This label looks a bit unnecessary to me. How about a switch statement? > Good point. I will rework this part of the code to use switch. Thanks, Łukasz > ret = ucsi_sync_control_common(ucsi, cmd); > switch (ret) { > case -EBUSY: > case -ETIMEDOUT: > cancel_delayed_work_sync(&udata->write_tmo); > schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS)); > break; > case 0: > cancel_delayed_work_sync(&udata->write_tmo); > break; > } > > return ret; > > > + /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout. > > + */ > > + if (ret == -EBUSY) > > + ret = -ETIMEDOUT; > > + > > + /* Schedule recovery attempt when we timeout or tried to send a command > > + * while still busy. > > + */ > > + if (ret == -ETIMEDOUT) { > > + cancel_delayed_work_sync(&udata->write_tmo); > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + } > > + > > + return ret; > > +} > > + > > struct ucsi_operations cros_ucsi_ops = { > > .read_version = cros_ucsi_read_version, > > .read_cci = cros_ucsi_read_cci, > > .read_message_in = cros_ucsi_read_message_in, > > .async_control = cros_ucsi_async_control, > > - .sync_control = ucsi_sync_control_common, > > + .sync_control = cros_ucsi_sync_control, > > }; > > > > static void cros_ucsi_work(struct work_struct *work) > > @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work) > > ucsi_notify_common(udata->ucsi, cci); > > } > > > > +static void cros_ucsi_write_timeout(struct work_struct *work) > > +{ > > + struct cros_ucsi_data *udata = > > + container_of(work, struct cros_ucsi_data, write_tmo.work); > > + u32 cci; > > + u64 cmd; > > + > > + if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) { > > + dev_err(udata->dev, > > + "Reading CCI failed; no write timeout recovery possible."); > > + return; > > + } > > + > > + if (cci & UCSI_CCI_BUSY) { > > + udata->tmo_counter++; > > + > > + if (udata->tmo_counter <= WRITE_TMO_CTR_MAX) > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + else > > + dev_err(udata->dev, > > + "PPM unresponsive - too many write timeouts."); > > + > > + return; > > + } > > + > > + /* No longer busy means we can reset our timeout counter. */ > > + udata->tmo_counter = 0; > > + > > + /* Need to ack previous command which may have timed out. */ > > + if (cci & UCSI_CCI_COMMAND_COMPLETE) { > > + cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; > > + cros_ucsi_async_control(udata->ucsi, &cmd); > > + > > + /* Check again after a few seconds that the system has > > + * recovered to make sure our async write above was successful. > > + */ > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + return; > > + } > > + > > + /* We recovered from a previous timeout. Treat this as a recovery from > > + * suspend and call resume. > > + */ > > + ucsi_resume(udata->ucsi); > > +} > > + > > static int cros_ucsi_event(struct notifier_block *nb, > > unsigned long host_event, void *_notify) > > { > > @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev) > > platform_set_drvdata(pdev, udata); > > > > INIT_WORK(&udata->work, cros_ucsi_work); > > + INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout); > > init_completion(&udata->complete); > > > > udata->ucsi = ucsi_create(dev, &cros_ucsi_ops); > > thanks, > > -- > heikki