Re: [PATCH v3 8/9] mhi: pci_generic: Add health-check

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Mani,

On 11/27/20 9:59 PM, Manivannan Sadhasivam wrote:
On Thu, Nov 26, 2020 at 04:29:06PM +0100, Loic Poulain wrote:
If the modem crashes for any reason, we may not be able to detect
it at MHI level (MHI registers not reachable anymore).

This patch implements a health-check mechanism to check regularly
that device is alive (MHI layer can communicate with). If device
is not alive (because a crash or unexpected reset), the recovery
procedure is triggered.

Tested successfully with Telit FN980m module.

Signed-off-by: Loic Poulain <loic.poulain@xxxxxxxxxx>

Bhaumik, Hemant, does this looks reasonable to you? Or we can do a
better job in the MHI stack. To me this is not a specific usecase for
Telit.

As far as i understood the change, MHI is unaware of this because this check is for underlying transport e.g. pci. This change looks good to me. Please apply this patch.

Thanks,
Hemant

If you guys plan to implement it later, I can just apply this patch in
the meantime as it looks good to me.

Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@xxxxxxxxxx>

Thanks,
Mani

---
  drivers/bus/mhi/pci_generic.c | 34 ++++++++++++++++++++++++++++++++++
  1 file changed, 34 insertions(+)

diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 3ac5cd2..26c2dfa 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -14,12 +14,15 @@
  #include <linux/mhi.h>
  #include <linux/module.h>
  #include <linux/pci.h>
+#include <linux/timer.h>
  #include <linux/workqueue.h>
#define MHI_PCI_DEFAULT_BAR_NUM 0 #define DEV_RESET_REG (0xB0) +#define HEALTH_CHECK_PERIOD (HZ * 5)
+
  /**
   * struct mhi_pci_dev_info - MHI PCI device specific information
   * @config: MHI controller configuration
@@ -190,6 +193,7 @@ struct mhi_pci_device {
  	struct mhi_controller mhi_cntrl;
  	struct pci_saved_state *pci_state;
  	struct work_struct recovery_work;
+	struct timer_list health_check_timer;
  	unsigned long status;
  };
@@ -332,6 +336,8 @@ static void mhi_pci_recovery_work(struct work_struct *work) dev_warn(&pdev->dev, "device recovery started\n"); + del_timer(&mhi_pdev->health_check_timer);
+
  	/* Clean up MHI state */
  	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
  		mhi_power_down(mhi_cntrl, false);
@@ -355,6 +361,7 @@ static void mhi_pci_recovery_work(struct work_struct *work)
  		goto err_unprepare;
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
  	return;
err_unprepare:
@@ -364,6 +371,21 @@ static void mhi_pci_recovery_work(struct work_struct *work)
  		dev_err(&pdev->dev, "Recovery failed\n");
  }
+static void health_check(struct timer_list *t)
+{
+	struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	if (!mhi_pci_is_alive(mhi_cntrl)) {
+		dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
+		queue_work(system_long_wq, &mhi_pdev->recovery_work);
+		return;
+	}
+
+	/* reschedule in two seconds */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
  static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  {
  	const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
@@ -379,6 +401,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  		return -ENOMEM;
INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
+	timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
mhi_cntrl_config = info->config;
  	mhi_cntrl = &mhi_pdev->mhi_cntrl;
@@ -431,6 +454,9 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status); + /* start health check */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
  	return 0;
err_unprepare:
@@ -446,6 +472,7 @@ static void mhi_pci_remove(struct pci_dev *pdev)
  	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
  	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+ del_timer(&mhi_pdev->health_check_timer);
  	cancel_work_sync(&mhi_pdev->recovery_work);
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
@@ -466,6 +493,8 @@ void mhi_pci_reset_prepare(struct pci_dev *pdev)
dev_info(&pdev->dev, "reset\n"); + del_timer(&mhi_pdev->health_check_timer);
+
  	/* Clean up MHI state */
  	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
  		mhi_power_down(mhi_cntrl, false);
@@ -509,6 +538,7 @@ void mhi_pci_reset_done(struct pci_dev *pdev)
  	}
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
  }
static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
@@ -569,6 +599,7 @@ int  __maybe_unused mhi_pci_suspend(struct device *dev)
  	struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
  	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+ del_timer(&mhi_pdev->health_check_timer);
  	cancel_work_sync(&mhi_pdev->recovery_work);
/* Transition to M3 state */
@@ -604,6 +635,9 @@ static int __maybe_unused mhi_pci_resume(struct device *dev)
  		goto err_recovery;
  	}
+ /* Resume health check */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
  	return 0;
err_recovery:
--
2.7.4


--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux