From: Quinn Tran <qutran@xxxxxxxxxxx>
Use 'no-op' mailbox command to check and see if FW is still responsive.
Signed-off-by: Quinn Tran <qutran@xxxxxxxxxxx>
Signed-off-by: Nilesh Javali <njavali@xxxxxxxxxxx>
---
drivers/scsi/qla2xxx/qla_def.h | 4 ++
drivers/scsi/qla2xxx/qla_gbl.h | 1 +
drivers/scsi/qla2xxx/qla_init.c | 6 ++-
drivers/scsi/qla2xxx/qla_iocb.c | 4 ++
drivers/scsi/qla2xxx/qla_isr.c | 4 ++
drivers/scsi/qla2xxx/qla_mbx.c | 27 +++++++++++++
drivers/scsi/qla2xxx/qla_nvme.c | 4 ++
drivers/scsi/qla2xxx/qla_os.c | 68 +++++++++++++++++++++++++++++++++
8 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index def4d99f80e9..2f67ec1df3e6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3660,6 +3660,8 @@ struct qla_qpair {
struct qla_tgt_counters tgt_counters;
uint16_t cpuid;
struct qla_fw_resources fwres ____cacheline_aligned;
+ u32 cmd_cnt;
+ u32 cmd_completion_cnt;
};
/* Place holder for FW buffer parameters */
@@ -4616,6 +4618,7 @@ struct qla_hw_data {
struct qla_hw_data_stat stat;
pci_error_state_t pci_error_state;
+ u64 prev_cmd_cnt;
};
struct active_regions {
@@ -4743,6 +4746,7 @@ typedef struct scsi_qla_host {
#define SET_ZIO_THRESHOLD_NEEDED 32
#define ISP_ABORT_TO_ROM 33
#define VPORT_DELETE 34
+#define HEARTBEAT_CHK 38
#define PROCESS_PUREX_IOCB 63
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index fae5cae6f0a8..70b7cda0a25a 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -551,6 +551,7 @@ extern int qla2xxx_read_remote_register(scsi_qla_host_t *, uint32_t,
uint32_t *);
extern int qla2xxx_write_remote_register(scsi_qla_host_t *, uint32_t,
uint32_t);
+void qla_no_op_mb(struct scsi_qla_host *vha);
/*
* Global Function Prototypes in qla_isr.c source file.
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index eb825318e3f5..f8f471157109 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -6870,10 +6870,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
ha->flags.fw_init_done = 0;
ha->chip_reset++;
ha->base_qpair->chip_reset = ha->chip_reset;
+ ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
for (i = 0; i < ha->max_qpairs; i++) {
- if (ha->queue_pair_map[i])
+ if (ha->queue_pair_map[i]) {
ha->queue_pair_map[i]->chip_reset =
ha->base_qpair->chip_reset;
+ ha->queue_pair_map[i]->cmd_cnt =
+ ha->queue_pair_map[i]->cmd_completion_cnt = 0;
+ }
}
/* purge MBox commands */
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 38b5bdde2405..d0ee843f6b04 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1710,6 +1710,7 @@ qla24xx_start_scsi(srb_t *sp)
} else
req->ring_ptr++;
+ sp->qpair->cmd_cnt++;
sp->flags |= SRB_DMA_VALID;
/* Set chip new ring index. */
@@ -1912,6 +1913,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
} else
req->ring_ptr++;
+ sp->qpair->cmd_cnt++;
/* Set chip new ring index. */
wrt_reg_dword(req->req_q_in, req->ring_index);
@@ -2068,6 +2070,7 @@ qla2xxx_start_scsi_mq(srb_t *sp)
} else
req->ring_ptr++;
+ sp->qpair->cmd_cnt++;
sp->flags |= SRB_DMA_VALID;
/* Set chip new ring index. */
@@ -2284,6 +2287,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
} else
req->ring_ptr++;
+ sp->qpair->cmd_cnt++;
/* Set chip new ring index. */
wrt_reg_dword(req->req_q_in, req->ring_index);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 6e8f737a4af3..8a8e355f4a89 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2322,6 +2322,8 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
if (unlikely(iocb->u.nvme.aen_op))
atomic_dec(&sp->vha->hw->nvme_active_aen_cnt);
+ else
+ sp->qpair->cmd_completion_cnt++;
if (unlikely(comp_status != CS_COMPLETE))
logit = 1;
@@ -2976,6 +2978,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
return;
}
+ sp->qpair->cmd_completion_cnt++;
+
/* Fast path completion. */
if (comp_status == CS_COMPLETE && scsi_status == 0) {
qla2x00_process_completed_request(vha, req, handle);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 0bcd8afdc0ff..9f3ad8aa649c 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -6939,3 +6939,30 @@ ql26xx_led_config(scsi_qla_host_t *vha, uint16_t options, uint16_t *led)
return rval;
}
+
+/**
+ * qla_no_op_mb(): This MB is used to check if FW is still alive and
+ * able to generate an interrupt. Otherwise, a timeout will trigger
+ * FW dump + reset
+ * @vha: host adapter pointer
+ * Return: None
+ */
+void qla_no_op_mb(struct scsi_qla_host *vha)
+{
+ mbx_cmd_t mc;
+ mbx_cmd_t *mcp = &mc;
+ int rval;
+
+ memset(&mc, 0, sizeof(mc));
+ mcp->mb[0] = 0; // noop cmd= 0
+ mcp->out_mb = MBX_0;
+ mcp->in_mb = MBX_0;
+ mcp->tov = 5;
+ mcp->flags = 0;
+ rval = qla2x00_mailbox_command(vha, mcp);
+
+ if (rval) {
+ ql_dbg(ql_dbg_async, vha, 0x7071,
+ "Failed %s %x\n", __func__, rval);
+ }
+}
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index e119f8b24e33..3e5c70a1d969 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -536,6 +536,10 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
req->ring_ptr++;
}
+ /* ignore nvme async cmd due to long timeout */
+ if (!nvme->u.nvme.aen_op)
+ sp->qpair->cmd_cnt++;
+
/* Set chip new ring index. */
wrt_reg_dword(req->req_q_in, req->ring_index);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 4eab564ea6a0..aa8581e07156 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6969,6 +6969,17 @@ qla2x00_do_dpc(void *data)
qla2x00_lip_reset(base_vha);
}
+ if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) {
+ /*
+ * if there is a mb in progress then that's
+ * enough of a check to see if fw is still ticking.
+ */
+ if (!ha->flags.mbox_busy && base_vha->flags.init_done)
+ qla_no_op_mb(base_vha);
+
+ clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags);
+ }
+
ha->dpc_active = 0;
end_loop:
set_current_state(TASK_INTERRUPTIBLE);
@@ -7025,6 +7036,61 @@ qla2x00_rst_aen(scsi_qla_host_t *vha)
}
}
+static bool qla_do_hb(struct scsi_qla_host *vha)
+{