If the modem crashes for any reason, we may not be able to detect it at MHI level (MHI registers not reachable anymore). This patch implements a health-check mechanism to check regularly that device is alive (MHI layer can communicate with). If device is not alive (because a crash or unexpected reset), the recovery procedure is triggered. Tested successfully with Telit FN980m module. Signed-off-by: Loic Poulain <loic.poulain@xxxxxxxxxx> Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx> Reviewed-by: Hemant Kumar <hemantk@xxxxxxxxxxxxxx> --- drivers/bus/mhi/pci_generic.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 04be74b..b427d34 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -14,10 +14,13 @@ #include <linux/mhi.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/timer.h> #include <linux/workqueue.h> #define MHI_PCI_DEFAULT_BAR_NUM 0 +#define HEALTH_CHECK_PERIOD (HZ * 5) + /** * struct mhi_pci_dev_info - MHI PCI device specific information * @config: MHI controller configuration @@ -188,6 +191,7 @@ struct mhi_pci_device { struct mhi_controller mhi_cntrl; struct pci_saved_state *pci_state; struct work_struct recovery_work; + struct timer_list health_check_timer; unsigned long status; }; @@ -325,6 +329,8 @@ static void mhi_pci_recovery_work(struct work_struct *work) dev_warn(&pdev->dev, "device recovery started\n"); + del_timer(&mhi_pdev->health_check_timer); + /* Clean up MHI state */ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { mhi_power_down(mhi_cntrl, false); @@ -348,6 +354,7 @@ static void mhi_pci_recovery_work(struct work_struct *work) goto err_unprepare; set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status); + mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD); return; err_unprepare: @@ -357,6 +364,21 @@ static void mhi_pci_recovery_work(struct work_struct *work) dev_err(&pdev->dev, "Recovery failed\n"); } +static void health_check(struct timer_list *t) +{ + struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer); + struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; + + if (!mhi_pci_is_alive(mhi_cntrl)) { + dev_err(mhi_cntrl->cntrl_dev, "Device died\n"); + queue_work(system_long_wq, &mhi_pdev->recovery_work); + return; + } + + /* reschedule in two seconds */ + mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD); +} + static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data; @@ -372,6 +394,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work); + timer_setup(&mhi_pdev->health_check_timer, health_check, 0); mhi_cntrl_config = info->config; mhi_cntrl = &mhi_pdev->mhi_cntrl; @@ -424,6 +447,9 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status); + /* start health check */ + mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD); + return 0; err_unprepare: @@ -439,6 +465,7 @@ static void mhi_pci_remove(struct pci_dev *pdev) struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev); struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; + del_timer(&mhi_pdev->health_check_timer); cancel_work_sync(&mhi_pdev->recovery_work); if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { @@ -456,6 +483,8 @@ void mhi_pci_reset_prepare(struct pci_dev *pdev) dev_info(&pdev->dev, "reset\n"); + del_timer(&mhi_pdev->health_check_timer); + /* Clean up MHI state */ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { mhi_power_down(mhi_cntrl, false); @@ -499,6 +528,7 @@ void mhi_pci_reset_done(struct pci_dev *pdev) } set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status); + mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD); } static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev, @@ -559,6 +589,7 @@ int __maybe_unused mhi_pci_suspend(struct device *dev) struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev); struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl; + del_timer(&mhi_pdev->health_check_timer); cancel_work_sync(&mhi_pdev->recovery_work); /* Transition to M3 state */ @@ -594,6 +625,9 @@ static int __maybe_unused mhi_pci_resume(struct device *dev) goto err_recovery; } + /* Resume health check */ + mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD); + return 0; err_recovery: -- 2.7.4