From: James Smart <jsmart2021@xxxxxxxxx> [ Upstream commit a9978e3978406ef5e35870b10e677cf75a2620b6 ] Mailbox commands sent via ioctl/bsg from user applications may be interrupted from processing by a concurrently triggered PCI function reset. The command will not generate a completion due to the reset. This results in a user application hang waiting for the mailbox command to complete. Resolve by changing the function reset handler to detect that there was an outstanding mailbox command and simulate a mailbox completion. Add some additional debug when a mailbox command times out. Link: https://lore.kernel.org/r/20210707184351.67872-13-jsmart2021@xxxxxxxxx Co-developed-by: Justin Tee <justin.tee@xxxxxxxxxxxx> Signed-off-by: Justin Tee <justin.tee@xxxxxxxxxxxx> Signed-off-by: James Smart <jsmart2021@xxxxxxxxx> Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_init.c | 11 ++++++++++- drivers/scsi/lpfc/lpfc_sli.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c4235699a123..221f22a026e5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1852,6 +1852,7 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, { int rc; uint32_t intr_mode; + LPFC_MBOXQ_t *mboxq; if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) { @@ -1871,11 +1872,19 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, "Recovery...\n"); /* If we are no wait, the HBA has been reset and is not - * functional, thus we should clear LPFC_SLI_ACTIVE flag. + * functional, thus we should clear + * (LPFC_SLI_ACTIVE | LPFC_SLI_MBOX_ACTIVE) flags. */ if (mbx_action == LPFC_MBX_NO_WAIT) { spin_lock_irq(&phba->hbalock); phba->sli.sli_flag &= ~LPFC_SLI_ACTIVE; + if (phba->sli.mbox_active) { + mboxq = phba->sli.mbox_active; + mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED; + __lpfc_mbox_cmpl_put(phba, mboxq); + phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; + phba->sli.mbox_active = NULL; + } spin_unlock_irq(&phba->hbalock); } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f530d8fe7a8c..b6f58843c77a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -8790,8 +8790,11 @@ static int lpfc_sli4_async_mbox_block(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; + LPFC_MBOXQ_t *mboxq; int rc = 0; unsigned long timeout = 0; + u32 sli_flag; + u8 cmd, subsys, opcode; /* Mark the asynchronous mailbox command posting as blocked */ spin_lock_irq(&phba->hbalock); @@ -8809,12 +8812,37 @@ lpfc_sli4_async_mbox_block(struct lpfc_hba *phba) if (timeout) lpfc_sli4_process_missed_mbox_completions(phba); - /* Wait for the outstnading mailbox command to complete */ + /* Wait for the outstanding mailbox command to complete */ while (phba->sli.mbox_active) { /* Check active mailbox complete status every 2ms */ msleep(2); if (time_after(jiffies, timeout)) { - /* Timeout, marked the outstanding cmd not complete */ + /* Timeout, mark the outstanding cmd not complete */ + + /* Sanity check sli.mbox_active has not completed or + * cancelled from another context during last 2ms sleep, + * so take hbalock to be sure before logging. + */ + spin_lock_irq(&phba->hbalock); + if (phba->sli.mbox_active) { + mboxq = phba->sli.mbox_active; + cmd = mboxq->u.mb.mbxCommand; + subsys = lpfc_sli_config_mbox_subsys_get(phba, + mboxq); + opcode = lpfc_sli_config_mbox_opcode_get(phba, + mboxq); + sli_flag = psli->sli_flag; + spin_unlock_irq(&phba->hbalock); + lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, + "2352 Mailbox command x%x " + "(x%x/x%x) sli_flag x%x could " + "not complete\n", + cmd, subsys, opcode, + sli_flag); + } else { + spin_unlock_irq(&phba->hbalock); + } + rc = 1; break; } -- 2.30.2