During driver load create a hba hot unplug watchdog thread "_base_hba_hot_unplug_work". This will poll whether HBA device is unplugged or not by reading IOC's vendor field in IOC's PCI configuration space for every one second. If hot unplug is detected, it terminates all the outstanding IOs and hence kernels's PCIe hotplug module (i.e. pciehp) will clear the instances of the hot unplugged PCI device. Below functions starts and stops the watchdog. mpt3sas_base_start_hba_unplug_watchdog mpt3sas_base_stop_hba_unplug_watchdog Watchdog thread starts immediately once IOC becomes operational. Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@xxxxxxxxxxxx> --- drivers/scsi/mpt3sas/mpt3sas_base.c | 92 +++++++++++++++++++++++++++++++++++- drivers/scsi/mpt3sas/mpt3sas_base.h | 6 +++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 7 +++ 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 8b33670..6c8a30f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -69,6 +69,7 @@ static MPT_CALLBACK mpt_callbacks[MPT_MAX_CALLBACKS]; #define FAULT_POLLING_INTERVAL 1000 /* in milliseconds */ +#define HBA_HOTUNPLUG_POLLING_INTERVAL 1000 /* in milliseconds */ /* maximum controller queue depth */ #define MAX_HBA_QUEUE_DEPTH 30000 @@ -672,6 +673,46 @@ _base_fault_reset_work(struct work_struct *work) spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); } +static void +_base_hba_hot_unplug_work(struct work_struct *work) +{ + struct MPT3SAS_ADAPTER *ioc = + container_of(work, struct MPT3SAS_ADAPTER, + hba_hot_unplug_work.work); + unsigned long flags; + + spin_lock_irqsave(&ioc->hba_hot_unplug_lock, flags); + if (ioc->shost_recovery || ioc->pci_error_recovery) + goto rearm_timer; + + if (mpt3sas_base_pci_device_is_unplugged(ioc)) { + if (ioc->remove_host) { + pr_err(MPT3SAS_FMT + "The IOC seems hot unplugged and the driver is " + "waiting for pciehp module to remove the PCIe " + "device instance associated with IOC!!!\n", + ioc->name); + goto rearm_timer; + } + + /* Set remove_host flag here, since kernel will invoke driver's + * .remove() callback function one after the other for all hot + * un-plugged devices, so it may take some time to call + * .remove() function for subsequent hot un-plugged + * PCI devices. + */ + ioc->remove_host = 1; + } + +rearm_timer: + if (ioc->hba_hot_unplug_work_q) + queue_delayed_work(ioc->hba_hot_unplug_work_q, + &ioc->hba_hot_unplug_work, + msecs_to_jiffies(HBA_HOTUNPLUG_POLLING_INTERVAL)); + spin_unlock_irqrestore(&ioc->hba_hot_unplug_lock, flags); +} + + /** * mpt3sas_base_start_watchdog - start the fault_reset_work_q * @ioc: per adapter object @@ -730,6 +771,54 @@ mpt3sas_base_stop_watchdog(struct MPT3SAS_ADAPTER *ioc) } } +void +mpt3sas_base_start_hba_unplug_watchdog(struct MPT3SAS_ADAPTER *ioc) +{ + unsigned long flags; + + if (ioc->hba_hot_unplug_work_q) + return; + + /* Initialize hba hot unplug polling */ + INIT_DELAYED_WORK(&ioc->hba_hot_unplug_work, + _base_hba_hot_unplug_work); + snprintf(ioc->hba_hot_unplug_work_q_name, + sizeof(ioc->hba_hot_unplug_work_q_name), "poll_%s%d_hba_unplug", + ioc->driver_name, ioc->id); + ioc->hba_hot_unplug_work_q = + create_singlethread_workqueue(ioc->hba_hot_unplug_work_q_name); + if (!ioc->hba_hot_unplug_work_q) { + pr_err(MPT3SAS_FMT "%s: failed (line=%d)\n", + ioc->name, __func__, __LINE__); + return; + } + + spin_lock_irqsave(&ioc->hba_hot_unplug_lock, flags); + if (ioc->hba_hot_unplug_work_q) + queue_delayed_work(ioc->hba_hot_unplug_work_q, + &ioc->hba_hot_unplug_work, + msecs_to_jiffies(HBA_HOTUNPLUG_POLLING_INTERVAL)); + spin_unlock_irqrestore(&ioc->hba_hot_unplug_lock, flags); +} + +void +mpt3sas_base_stop_hba_unplug_watchdog(struct MPT3SAS_ADAPTER *ioc) +{ + unsigned long flags; + struct workqueue_struct *wq; + + spin_lock_irqsave(&ioc->hba_hot_unplug_lock, flags); + wq = ioc->hba_hot_unplug_work_q; + ioc->hba_hot_unplug_work_q = NULL; + spin_unlock_irqrestore(&ioc->hba_hot_unplug_lock, flags); + + if (wq) { + if (!cancel_delayed_work_sync(&ioc->hba_hot_unplug_work)) + flush_workqueue(wq); + destroy_workqueue(wq); + } +} + /** * mpt3sas_base_fault_info - verbose translation of firmware FAULT code * @ioc: per adapter object @@ -6458,7 +6547,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc) } skip_init_reply_post_host_index: - + mpt3sas_base_start_hba_unplug_watchdog(ioc); _base_unmask_interrupts(ioc); if (ioc->hba_mpi_version_belonged != MPI2_VERSION) { @@ -6789,6 +6878,7 @@ mpt3sas_base_detach(struct MPT3SAS_ADAPTER *ioc) __func__)); mpt3sas_base_stop_watchdog(ioc); + mpt3sas_base_stop_hba_unplug_watchdog(ioc); mpt3sas_base_free_resources(ioc); _base_release_memory_pools(ioc); mpt3sas_free_enclosure_list(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 8ee3ba7..4186bc9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1140,8 +1140,11 @@ struct MPT3SAS_ADAPTER { /* fw fault handler */ char fault_reset_work_q_name[20]; + char hba_hot_unplug_work_q_name[20]; struct workqueue_struct *fault_reset_work_q; + struct workqueue_struct *hba_hot_unplug_work_q; struct delayed_work fault_reset_work; + struct delayed_work hba_hot_unplug_work; /* fw event handler */ char firmware_event_name[20]; @@ -1158,6 +1161,7 @@ struct MPT3SAS_ADAPTER { struct mutex reset_in_progress_mutex; spinlock_t ioc_reset_in_progress_lock; + spinlock_t hba_hot_unplug_lock; u8 ioc_link_reset_in_progress; u8 ignore_loginfos; @@ -1482,6 +1486,8 @@ mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc); u8 mpt3sas_base_check_cmd_timeout(struct MPT3SAS_ADAPTER *ioc, u8 status, void *mpi_request, int sz); +void mpt3sas_base_start_hba_unplug_watchdog(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_base_stop_hba_unplug_watchdog(struct MPT3SAS_ADAPTER *ioc); /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index eeee9da..7e0c4ec 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -9828,9 +9828,11 @@ static void scsih_remove(struct pci_dev *pdev) ioc->remove_host = 1; mpt3sas_wait_for_commands_to_complete(ioc); + spin_lock_irqsave(&ioc->hba_hot_unplug_lock, flags); _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); + spin_unlock_irqrestore(&ioc->hba_hot_unplug_lock, flags); spin_lock_irqsave(&ioc->fw_event_lock, flags); wq = ioc->firmware_event_thread; @@ -10724,6 +10726,7 @@ scsih_suspend(struct pci_dev *pdev, pm_message_t state) pci_power_t device_state; mpt3sas_base_stop_watchdog(ioc); + mpt3sas_base_stop_hba_unplug_watchdog(ioc); flush_scheduled_work(); scsi_block_requests(shost); device_state = pci_choose_state(pdev, state); @@ -10766,6 +10769,7 @@ scsih_resume(struct pci_dev *pdev) mpt3sas_base_hard_reset_handler(ioc, SOFT_RESET); scsi_unblock_requests(shost); mpt3sas_base_start_watchdog(ioc); + mpt3sas_base_start_hba_unplug_watchdog(ioc); return 0; } #endif /* CONFIG_PM */ @@ -10796,12 +10800,14 @@ scsih_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) ioc->pci_error_recovery = 1; scsi_block_requests(ioc->shost); mpt3sas_base_stop_watchdog(ioc); + mpt3sas_base_stop_hba_unplug_watchdog(ioc); mpt3sas_base_free_resources(ioc); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ ioc->pci_error_recovery = 1; mpt3sas_base_stop_watchdog(ioc); + mpt3sas_base_stop_hba_unplug_watchdog(ioc); _scsih_flush_running_cmds(ioc); return PCI_ERS_RESULT_DISCONNECT; } @@ -10862,6 +10868,7 @@ scsih_pci_resume(struct pci_dev *pdev) pci_cleanup_aer_uncorrect_error_status(pdev); mpt3sas_base_start_watchdog(ioc); + mpt3sas_base_start_hba_unplug_watchdog(ioc); scsi_unblock_requests(ioc->shost); } -- 1.8.3.1