From: Manish Rangankar <mrangankar@xxxxxxxxxxx> DPC thread gets restricted due to a no-op mailbox, which is a blocking call and has a high execution frequency. To free up the DPC thread we move no-op handling to the workqueue. Also, modified qla_do_hb to send no-op MBC if we don’t have any active interrupts, but there are still IOs outstanding with firmware. Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Signed-off-by: Manish Rangankar <mrangankar@xxxxxxxxxxx> Signed-off-by: Nilesh Javali <njavali@xxxxxxxxxxx> --- drivers/scsi/qla2xxx/qla_def.h | 4 +- drivers/scsi/qla2xxx/qla_init.c | 2 + drivers/scsi/qla2xxx/qla_os.c | 74 +++++++++++++++------------------ 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index be2eb75ee1a3..d6e131bf1824 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3750,6 +3750,7 @@ struct qla_qpair { struct qla_fw_resources fwres ____cacheline_aligned; u32 cmd_cnt; u32 cmd_completion_cnt; + u32 prev_completion_cnt; }; /* Place holder for FW buffer parameters */ @@ -4607,6 +4608,7 @@ struct qla_hw_data { struct qla_chip_state_84xx *cs84xx; struct isp_operations *isp_ops; struct workqueue_struct *wq; + struct work_struct hb_work; struct qlfc_fw fw_buf; /* FCP_CMND priority support */ @@ -4708,7 +4710,6 @@ struct qla_hw_data { struct qla_hw_data_stat stat; pci_error_state_t pci_error_state; - u64 prev_cmd_cnt; struct dma_pool *purex_dma_pool; struct btree_head32 host_map; @@ -4854,7 +4855,6 @@ typedef struct scsi_qla_host { #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 #define VPORT_DELETE 34 -#define HEARTBEAT_CHK 38 #define PROCESS_PUREX_IOCB 63 diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index c6b3d0e7489e..a9a4243cb15a 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7025,12 +7025,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) ha->chip_reset++; ha->base_qpair->chip_reset = ha->chip_reset; ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; for (i = 0; i < ha->max_qpairs; i++) { if (ha->queue_pair_map[i]) { ha->queue_pair_map[i]->chip_reset = ha->base_qpair->chip_reset; ha->queue_pair_map[i]->cmd_cnt = ha->queue_pair_map[i]->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index a1e861ecfc01..0454f79a8047 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2794,6 +2794,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) return atomic_read(&vha->loop_state) == LOOP_READY; } +static void qla_hb_work_fn(struct work_struct *work) +{ + struct qla_hw_data *ha = container_of(work, + struct qla_hw_data, hb_work); + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + + if (!ha->flags.mbox_busy && base_vha->flags.init_done) + qla_no_op_mb(base_vha); +} + static void qla2x00_iocb_work_fn(struct work_struct *work) { struct scsi_qla_host *vha = container_of(work, @@ -3232,6 +3242,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->transportt, sht->vendor_id); INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + INIT_WORK(&ha->hb_work, qla_hb_work_fn); /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); @@ -7118,17 +7129,6 @@ qla2x00_do_dpc(void *data) qla2x00_lip_reset(base_vha); } - if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) { - /* - * if there is a mb in progress then that's - * enough of a check to see if fw is still ticking. - */ - if (!ha->flags.mbox_busy && base_vha->flags.init_done) - qla_no_op_mb(base_vha); - - clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags); - } - ha->dpc_active = 0; end_loop: set_current_state(TASK_INTERRUPTIBLE); @@ -7187,57 +7187,51 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) static bool qla_do_heartbeat(struct scsi_qla_host *vha) { - u64 cmd_cnt, prev_cmd_cnt; - bool do_hb = false; struct qla_hw_data *ha = vha->hw; - int i; + u32 cmpl_cnt; + u16 i; + bool do_hb = false; - /* if cmds are still pending down in fw, then do hb */ - if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) { + /* + * Allow do_hb only if we don’t have any active interrupts, + * but there are still IOs outstanding with firmware. + */ + cmpl_cnt = ha->base_qpair->cmd_completion_cnt; + if (cmpl_cnt == ha->base_qpair->prev_completion_cnt && + cmpl_cnt != ha->base_qpair->cmd_cnt) { do_hb = true; goto skip; } + ha->base_qpair->prev_completion_cnt = cmpl_cnt; for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i] && - ha->queue_pair_map[i]->cmd_cnt != - ha->queue_pair_map[i]->cmd_completion_cnt) { - do_hb = true; - break; + if (ha->queue_pair_map[i]) { + cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt; + if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt && + cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) { + do_hb = true; + break; + } + ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt; } } skip: - prev_cmd_cnt = ha->prev_cmd_cnt; - cmd_cnt = ha->base_qpair->cmd_cnt; - for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i]) - cmd_cnt += ha->queue_pair_map[i]->cmd_cnt; - } - ha->prev_cmd_cnt = cmd_cnt; - - if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50)) - /* - * IOs are completing before periodic hb check. - * IOs seems to be running, do hb for sanity check. - */ - do_hb = true; - return do_hb; } static void qla_heart_beat(struct scsi_qla_host *vha) { + struct qla_hw_data *ha = vha->hw; + if (vha->vp_idx) return; if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) { - set_bit(HEARTBEAT_CHK, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); - } + if (qla_do_heartbeat(vha)) + queue_work(ha->wq, &ha->hb_work); } /************************************************************************** -- 2.19.0.rc0