During tolerance tests that force an HBA to become unresponsive, rmmod hangs resulting in the inability to remove the driver. The lpfc_pci_remove_one_s4 routine attempts to submit a clean up mailbox command via the lpfc_sli4_post_sync_mbox routine, but ends up waiting forever for a mailbox register to set its ready bit. Because the HBA is in an unrecoverable and unresponsive state, the ready bit will never be set. Create a new routine called lpfc_sli4_unrecoverable_port, which checks a port status register's error notification bits. Use the lpfc_sli4_unrecoverable_port routine in ready bit check routines to early return error if port is deemed unrecoverable. Also, when the lpfc_handle_eratt_s4 handler detects an unrecoverable state, call the lpfc_sli4_offline_eratt routine to kick off flushing outstanding I/O. Signed-off-by: Justin Tee <justin.tee@xxxxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_attr.c | 6 ++++++ drivers/scsi/lpfc/lpfc_init.c | 5 ++--- drivers/scsi/lpfc/lpfc_nvme.c | 4 +++- drivers/scsi/lpfc/lpfc_sli.c | 20 +++++++++++++++++++- drivers/scsi/lpfc/lpfc_sli4.h | 19 +++++++++++++++++++ 5 files changed, 49 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 22f2e046e8eb..ddbc54e8bcfd 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1644,6 +1644,12 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba) !bf_get(lpfc_sliport_status_err, &portstat_reg)) return -EPERM; + /* There is no point to wait if the port is in an unrecoverable + * state. + */ + if (lpfc_sli4_unrecoverable_port(&portstat_reg)) + return -EIO; + /* wait for the SLI port firmware ready after firmware reset */ for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) { msleep(10); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 75737088d011..5b30e71dc926 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2147,7 +2147,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) /* fall through for not able to recover */ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "3152 Unrecoverable error\n"); - phba->link_state = LPFC_HBA_ERROR; + lpfc_sli4_offline_eratt(phba); break; case LPFC_SLI_INTF_IF_TYPE_1: default: @@ -9566,8 +9566,7 @@ lpfc_sli4_post_status_check(struct lpfc_hba *phba) /* Final checks. The port status should be clean. */ if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, ®_data.word0) || - (bf_get(lpfc_sliport_status_err, ®_data) && - !bf_get(lpfc_sliport_status_rn, ®_data))) { + lpfc_sli4_unrecoverable_port(®_data)) { phba->work_status[0] = readl(phba->sli4_hba.u.if_type2. ERR1regaddr); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 152245f7cacc..ae3207e73113 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2265,6 +2265,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, } if (!vport->localport || test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) || + phba->link_state == LPFC_HBA_ERROR || vport->load_flag & FC_UNLOADING) return; @@ -2630,7 +2631,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * return values is ignored. The upcall is a courtesy to the * transport. */ - if (vport->load_flag & FC_UNLOADING) + if (vport->load_flag & FC_UNLOADING || + unlikely(vport->phba->link_state == LPFC_HBA_ERROR)) (void)nvme_fc_set_remoteport_devloss(remoteport, 0); ret = nvme_fc_unregister_remoteport(remoteport); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index c8b4632e3dd4..b4917db6e532 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -9895,7 +9895,8 @@ lpfc_sli4_async_mbox_unblock(struct lpfc_hba *phba) * port for twice the regular mailbox command timeout value. * * 0 - no timeout on waiting for bootstrap mailbox register ready. - * MBXERR_ERROR - wait for bootstrap mailbox register timed out. + * MBXERR_ERROR - wait for bootstrap mailbox register timed out or port + * is in an unrecoverable state. **/ static int lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) @@ -9903,6 +9904,23 @@ lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) uint32_t db_ready; unsigned long timeout; struct lpfc_register bmbx_reg; + struct lpfc_register portstat_reg = {-1}; + + /* Sanity check - there is no point to wait if the port is in an + * unrecoverable state. + */ + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= + LPFC_SLI_INTF_IF_TYPE_2) { + if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, + &portstat_reg.word0) || + lpfc_sli4_unrecoverable_port(&portstat_reg)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3858 Skipping bmbx ready because " + "Port Status x%x\n", + portstat_reg.word0); + return MBXERR_ERROR; + } + } timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq) * 1000) + jiffies; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 3b62c4032c31..2a0864e6d7cd 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -1180,3 +1180,22 @@ static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx) return q->q_pgs[idx / q->entry_cnt_per_pg] + (q->entry_size * (idx % q->entry_cnt_per_pg)); } + +/** + * lpfc_sli4_unrecoverable_port - Check ERR and RN bits in portstat_reg + * @portstat_reg: portstat_reg pointer containing portstat_reg contents + * + * Description: + * Use only for SLI4 interface type-2 or later. If ERR is set && RN is 0, then + * port is deemed unrecoverable. + * + * Returns: + * true - ERR && !RN + * false - otherwise + */ +static inline bool +lpfc_sli4_unrecoverable_port(struct lpfc_register *portstat_reg) +{ + return bf_get(lpfc_sliport_status_err, portstat_reg) && + !bf_get(lpfc_sliport_status_rn, portstat_reg); +} -- 2.38.0