On Mon, Dec 14, 2020 at 03:25:33PM +0100, Loic Poulain wrote: > Add support for system wide suspend/resume. During suspend, MHI > device controller must be put in M3 state and PCI bus in D3 state. > > Add a recovery procedure allowing to reinitialize the device in case > of error during resume steps, which can happen if device loses power > (and so its context) while system suspend. > > Signed-off-by: Loic Poulain <loic.poulain@xxxxxxxxxx> > Reviewed-by Hemant Kumar <hemantk@xxxxxxxxxxxxxx> > --- > drivers/bus/mhi/pci_generic.c | 102 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 102 insertions(+) > > diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c > index 2521cd4..0936701 100644 > --- a/drivers/bus/mhi/pci_generic.c > +++ b/drivers/bus/mhi/pci_generic.c > @@ -13,6 +13,7 @@ > #include <linux/mhi.h> > #include <linux/module.h> > #include <linux/pci.h> > +#include <linux/workqueue.h> > > #define MHI_PCI_DEFAULT_BAR_NUM 0 > > @@ -186,6 +187,7 @@ enum mhi_pci_device_status { > struct mhi_pci_device { > struct mhi_controller mhi_cntrl; > struct pci_saved_state *pci_state; > + struct work_struct recovery_work; > unsigned long status; > }; > > @@ -313,6 +315,48 @@ static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl) > /* no PM for now */ > } > > +static void mhi_pci_recovery_work(struct work_struct *work) > +{ > + struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device, > + recovery_work); > + struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; > + struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev); > + int err; > + > + dev_warn(&pdev->dev, "device recovery started\n"); > + > + /* Clean up MHI state */ > + if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { > + mhi_power_down(mhi_cntrl, false); > + mhi_unprepare_after_power_down(mhi_cntrl); > + } > + > + /* Check if we can recover without full reset */ > + pci_set_power_state(pdev, PCI_D0); > + pci_load_saved_state(pdev, mhi_pdev->pci_state); > + pci_restore_state(pdev); These pci state settings seems redundant with resume(). In this function you should first check if MHI is alive, if yes then do power up. Else you should just exit. > + > + if (!mhi_pci_is_alive(mhi_cntrl)) > + goto err_try_reset; > + > + err = mhi_prepare_for_power_up(mhi_cntrl); > + if (err) > + goto err_try_reset; > + > + err = mhi_sync_power_up(mhi_cntrl); > + if (err) > + goto err_unprepare; Add a debug log for recovery success. > + > + set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status); > + return; > + > +err_unprepare: > + mhi_unprepare_after_power_down(mhi_cntrl); > +err_try_reset: > + if (pci_reset_function(pdev)) So if the device recovers, who will make sure reinitializing the MHI controller? That's why I think we should convey the recovery result to PM core. I don't think using workqueue here is a good idea. > + dev_err(&pdev->dev, "Recovery failed\n"); > +} > + > static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data; > @@ -327,6 +371,8 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (!mhi_pdev) > return -ENOMEM; > > + INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work); > + > mhi_cntrl_config = info->config; > mhi_cntrl = &mhi_pdev->mhi_cntrl; > > @@ -391,6 +437,8 @@ static void mhi_pci_remove(struct pci_dev *pdev) > struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev); > struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; > > + cancel_work_sync(&mhi_pdev->recovery_work); > + > if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { > mhi_power_down(mhi_cntrl, true); > mhi_unprepare_after_power_down(mhi_cntrl); > @@ -456,12 +504,66 @@ static const struct pci_error_handlers mhi_pci_err_handler = { > .reset_done = mhi_pci_reset_done, > }; > > +static int __maybe_unused mhi_pci_suspend(struct device *dev) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev); > + struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; > + > + cancel_work_sync(&mhi_pdev->recovery_work); > + > + /* Transition to M3 state */ > + mhi_pm_suspend(mhi_cntrl); > + > + pci_save_state(pdev); > + pci_disable_device(pdev); > + pci_wake_from_d3(pdev, true); > + pci_set_power_state(pdev, PCI_D3hot); > + > + return 0; > +} > + > +static int __maybe_unused mhi_pci_resume(struct device *dev) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev); > + struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; > + int err; > + > + pci_set_power_state(pdev, PCI_D0); > + pci_restore_state(pdev); > + pci_set_master(pdev); > + > + err = pci_enable_device(pdev); > + if (err) > + goto err_recovery; > + > + /* Exit M3, transition to M0 state */ > + err = mhi_pm_resume(mhi_cntrl); > + if (err) { > + dev_err(&pdev->dev, "failed to resume device: %d\n", err); > + goto err_recovery; > + } > + > + return 0; > + > +err_recovery: > + /* The device may have loose power or crashed, try recovering it */ Did you actually hit this scenario? In the case of power loss or crash, can we access the MHI register space? Thanks, Mani > + queue_work(system_long_wq, &mhi_pdev->recovery_work); > + return 0; > +} > + > +static const struct dev_pm_ops mhi_pci_pm_ops = { > + SET_SYSTEM_SLEEP_PM_OPS(mhi_pci_suspend, mhi_pci_resume) > +}; > + > static struct pci_driver mhi_pci_driver = { > .name = "mhi-pci-generic", > .id_table = mhi_pci_id_table, > .probe = mhi_pci_probe, > .remove = mhi_pci_remove, > .err_handler = &mhi_pci_err_handler, > + .driver.pm = &mhi_pci_pm_ops > }; > module_pci_driver(mhi_pci_driver); > > -- > 2.7.4 >