> On Mar 22, 2021, at 11:42 PM, Nilesh Javali <njavali@xxxxxxxxxxx> wrote: > > From: Quinn Tran <qutran@xxxxxxxxxxx> > > For the mailbox thread that encounter PCIe error, pause that > thread until PCIe link reset/recovery completed to prevent > the thread from possibly unmapping any type of DMA resource that might > be in progress at the same time. > > Signed-off-by: Quinn Tran <qutran@xxxxxxxxxxx> > Signed-off-by: Nilesh Javali <njavali@xxxxxxxxxxx> > --- > drivers/scsi/qla2xxx/qla_mbx.c | 38 ++++++++++++++++++++++++++-------- > 1 file changed, 29 insertions(+), 9 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c > index 0149f84cdd8e..3bc6020cfb8d 100644 > --- a/drivers/scsi/qla2xxx/qla_mbx.c > +++ b/drivers/scsi/qla2xxx/qla_mbx.c > @@ -102,7 +102,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > int rval, i; > unsigned long flags = 0; > device_reg_t *reg; > - uint8_t abort_active; > + uint8_t abort_active, eeh_delay; > uint8_t io_lock_on; > uint16_t command = 0; > uint16_t *iptr; > @@ -136,7 +136,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > "PCI error, exiting.\n"); > return QLA_FUNCTION_TIMEOUT; > } > - > + eeh_delay = 0; > reg = ha->iobase; > io_lock_on = base_vha->flags.init_done; > > @@ -159,11 +159,10 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > } > > /* check if ISP abort is active and return cmd with timeout */ > - if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || > - test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || > - test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || > - ha->flags.eeh_busy) && > - !is_rom_cmd(mcp->mb[0])) { > + if (((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || > + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || > + test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) && > + !is_rom_cmd(mcp->mb[0])) || ha->flags.eeh_busy) { > ql_log(ql_log_info, vha, 0x1005, > "Cmd 0x%x aborted with timeout since ISP Abort is pending\n", > mcp->mb[0]); > @@ -186,7 +185,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > return QLA_FUNCTION_TIMEOUT; > } > atomic_dec(&ha->num_pend_mbx_stage1); > - if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { > + if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || > + ha->flags.eeh_busy) { > + ql_log(ql_log_warn, vha, 0xd035, > + "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", > + ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); > rval = QLA_ABORTED; > goto premature_exit; > } > @@ -266,6 +269,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > if (!wait_for_completion_timeout(&ha->mbx_intr_comp, > mcp->tov * HZ)) { > if (chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, > @@ -283,6 +288,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > > } else if (ha->flags.purge_mbox || > chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, flags); > @@ -324,6 +331,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > while (!ha->flags.mbox_int) { > if (ha->flags.purge_mbox || > chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, > @@ -532,7 +541,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); > /* Allow next mbx cmd to come in. */ > complete(&ha->mbx_cmd_comp); > - if (ha->isp_ops->abort_isp(vha)) { > + if (ha->isp_ops->abort_isp(vha) && > + !ha->flags.eeh_busy) { > /* Failed. retry later. */ > set_bit(ISP_ABORT_NEEDED, > &vha->dpc_flags); > @@ -585,6 +595,16 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__); > } > > + i = 500; > + while (i && eeh_delay && (ha->pci_error_state < QLA_PCI_SLOT_RESET)) { > + /* The caller of this mailbox encounter pci error. > + * Hold the thread until PCIE link reset complete to make > + * sure caller does not unmap dma while recovery is > + * in progress. > + */ Small nit…. Fix comment formatting for multi line. > + msleep(1); > + i--; > + } > return rval; > } > > -- > 2.19.0.rc0 > Code itself looks good. After fixing comment you can add Reviewed-by: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> -- Himanshu Madhani Oracle Linux Engineering