From: Quinn Tran <qutran@xxxxxxxxxxx> For the mailbox thread that encounter PCIe error, pause that thread until PCIe link reset/recovery completed to prevent the thread from possibly unmapping any type of DMA resource that might be in progress at the same time. Signed-off-by: Quinn Tran <qutran@xxxxxxxxxxx> Signed-off-by: Nilesh Javali <njavali@xxxxxxxxxxx> Reviewed-by: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> Tested-by: Laurence Oberman <loberman@xxxxxxxxxx> --- drivers/scsi/qla2xxx/qla_mbx.c | 39 ++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0149f84cdd8e..0bcd8afdc0ff 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -102,7 +102,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) int rval, i; unsigned long flags = 0; device_reg_t *reg; - uint8_t abort_active; + uint8_t abort_active, eeh_delay; uint8_t io_lock_on; uint16_t command = 0; uint16_t *iptr; @@ -136,7 +136,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) "PCI error, exiting.\n"); return QLA_FUNCTION_TIMEOUT; } - + eeh_delay = 0; reg = ha->iobase; io_lock_on = base_vha->flags.init_done; @@ -159,11 +159,10 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) } /* check if ISP abort is active and return cmd with timeout */ - if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || - test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || - ha->flags.eeh_busy) && - !is_rom_cmd(mcp->mb[0])) { + if (((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) && + !is_rom_cmd(mcp->mb[0])) || ha->flags.eeh_busy) { ql_log(ql_log_info, vha, 0x1005, "Cmd 0x%x aborted with timeout since ISP Abort is pending\n", mcp->mb[0]); @@ -186,7 +185,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) return QLA_FUNCTION_TIMEOUT; } atomic_dec(&ha->num_pend_mbx_stage1); - if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || + ha->flags.eeh_busy) { + ql_log(ql_log_warn, vha, 0xd035, + "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; } @@ -266,6 +269,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (!wait_for_completion_timeout(&ha->mbx_intr_comp, mcp->tov * HZ)) { if (chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, @@ -283,6 +288,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) } else if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -324,6 +331,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) while (!ha->flags.mbox_int) { if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, @@ -532,7 +541,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); /* Allow next mbx cmd to come in. */ complete(&ha->mbx_cmd_comp); - if (ha->isp_ops->abort_isp(vha)) { + if (ha->isp_ops->abort_isp(vha) && + !ha->flags.eeh_busy) { /* Failed. retry later. */ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); @@ -585,6 +595,17 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__); } + i = 500; + while (i && eeh_delay && (ha->pci_error_state < QLA_PCI_SLOT_RESET)) { + /* + * The caller of this mailbox encounter pci error. + * Hold the thread until PCIE link reset complete to make + * sure caller does not unmap dma while recovery is + * in progress. + */ + msleep(1); + i--; + } return rval; } -- 2.19.0.rc0