Hi Łukasz, On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote: > From: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx> > > In a suspend-resume edge case, the OPM is timing out in ucsi_resume and > the PPM is getting stuck waiting for a command complete ack. Add a write > timeout recovery task that will get us out of this state. Sorry, I did not realise this before, but this is in practice a fix (or workaround) to the driver that you just introduced in the previous patch. Please merge it into that. > Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx> > Signed-off-by: Łukasz Bartosik <ukaszb@xxxxxxxxxxxx> > --- > drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++- > 1 file changed, 87 insertions(+), 1 deletion(-) > > diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > index c588d9a57643..6daf61e7e62a 100644 > --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c > +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > @@ -7,6 +7,7 @@ > > #include <linux/container_of.h> > #include <linux/dev_printk.h> > +#include <linux/jiffies.h> > #include <linux/mod_devicetable.h> > #include <linux/module.h> > #include <linux/platform_data/cros_ec_commands.h> > @@ -29,6 +30,9 @@ > */ > #define WRITE_TMO_MS 5000 > > +/* Number of times to attempt recovery from a write timeout before giving up. */ > +#define WRITE_TMO_CTR_MAX 5 > + > struct cros_ucsi_data { > struct device *dev; > struct ucsi *ucsi; > @@ -36,6 +40,8 @@ struct cros_ucsi_data { > struct cros_ec_device *ec; > struct notifier_block nb; > struct work_struct work; > + struct delayed_work write_tmo; > + int tmo_counter; > > struct completion complete; > unsigned long flags; > @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) > return 0; > } > > +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) > +{ > + struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); > + int ret; > + > + ret = ucsi_sync_control_common(ucsi, cmd); > + if (ret) > + goto out; > + > + /* Successful write. Cancel any pending recovery work. */ > + cancel_delayed_work_sync(&udata->write_tmo); > + > + return 0; > +out: This label looks a bit unnecessary to me. How about a switch statement? ret = ucsi_sync_control_common(ucsi, cmd); switch (ret) { case -EBUSY: case -ETIMEDOUT: cancel_delayed_work_sync(&udata->write_tmo); schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS)); break; case 0: cancel_delayed_work_sync(&udata->write_tmo); break; } return ret; > + /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout. > + */ > + if (ret == -EBUSY) > + ret = -ETIMEDOUT; > + > + /* Schedule recovery attempt when we timeout or tried to send a command > + * while still busy. > + */ > + if (ret == -ETIMEDOUT) { > + cancel_delayed_work_sync(&udata->write_tmo); > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + } > + > + return ret; > +} > + > struct ucsi_operations cros_ucsi_ops = { > .read_version = cros_ucsi_read_version, > .read_cci = cros_ucsi_read_cci, > .read_message_in = cros_ucsi_read_message_in, > .async_control = cros_ucsi_async_control, > - .sync_control = ucsi_sync_control_common, > + .sync_control = cros_ucsi_sync_control, > }; > > static void cros_ucsi_work(struct work_struct *work) > @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work) > ucsi_notify_common(udata->ucsi, cci); > } > > +static void cros_ucsi_write_timeout(struct work_struct *work) > +{ > + struct cros_ucsi_data *udata = > + container_of(work, struct cros_ucsi_data, write_tmo.work); > + u32 cci; > + u64 cmd; > + > + if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) { > + dev_err(udata->dev, > + "Reading CCI failed; no write timeout recovery possible."); > + return; > + } > + > + if (cci & UCSI_CCI_BUSY) { > + udata->tmo_counter++; > + > + if (udata->tmo_counter <= WRITE_TMO_CTR_MAX) > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + else > + dev_err(udata->dev, > + "PPM unresponsive - too many write timeouts."); > + > + return; > + } > + > + /* No longer busy means we can reset our timeout counter. */ > + udata->tmo_counter = 0; > + > + /* Need to ack previous command which may have timed out. */ > + if (cci & UCSI_CCI_COMMAND_COMPLETE) { > + cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; > + cros_ucsi_async_control(udata->ucsi, &cmd); > + > + /* Check again after a few seconds that the system has > + * recovered to make sure our async write above was successful. > + */ > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + return; > + } > + > + /* We recovered from a previous timeout. Treat this as a recovery from > + * suspend and call resume. > + */ > + ucsi_resume(udata->ucsi); > +} > + > static int cros_ucsi_event(struct notifier_block *nb, > unsigned long host_event, void *_notify) > { > @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev) > platform_set_drvdata(pdev, udata); > > INIT_WORK(&udata->work, cros_ucsi_work); > + INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout); > init_completion(&udata->complete); > > udata->ucsi = ucsi_create(dev, &cros_ucsi_ops); thanks, -- heikki