Now that the lower half has much better per-cpu parallelization using the hardware queues, the SCSI MQ support needs to be tied into it. The involves the following mods: - Use the hardware queue info from the midlayer to help select the hardware queue to utilize. This required change to the get_scsi-buf_xxx routines. = Remove lpfc_sli4_scmd_to_wqidx_distr() routine. No longer needed. - Includes fix for SLI-3 that does not have multi queue parallelization. Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <jsmart2021@xxxxxxxxx> --- v2: Adapt for 5.0 api changes: SCSI mq support only. Remove all use of shost_use_scsi_mq(). Remove driver flag and module parameter to enable scsi_mq --- drivers/scsi/lpfc/lpfc.h | 3 +- drivers/scsi/lpfc/lpfc_init.c | 5 +-- drivers/scsi/lpfc/lpfc_scsi.c | 72 ++++++++++++------------------------------- drivers/scsi/lpfc/lpfc_scsi.h | 2 -- 4 files changed, 24 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 9262c52e32d6..755bf49c272c 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -619,7 +619,8 @@ struct lpfc_ras_fwlog { struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_scsi_buf * (*lpfc_get_scsi_buf) - (struct lpfc_hba *, struct lpfc_nodelist *); + (struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, + struct scsi_cmnd *cmnd); int (*lpfc_scsi_prep_dma_buf) (struct lpfc_hba *, struct lpfc_scsi_buf *); void (*lpfc_scsi_unprep_dma_buf) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 36d9c32c9c87..e4040cdd76b7 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4063,12 +4063,13 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_lun = vport->cfg_max_luns; shost->this_id = -1; shost->max_cmd_len = 16; - shost->nr_hw_queues = phba->cfg_hdw_queue; if (phba->sli_rev == LPFC_SLI_REV4) { + shost->nr_hw_queues = phba->cfg_hdw_queue; shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; shost->sg_tablesize = phba->cfg_scsi_seg_cnt; - } + } else + shost->nr_hw_queues = 1; /* * Set initial can_queue value since 0 is no longer supported and diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 55c58bbfee08..79a3765bdd9b 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -636,7 +636,8 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba, * Pointer to lpfc_scsi_buf - Success **/ static struct lpfc_scsi_buf* -lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, + struct scsi_cmnd *cmnd) { struct lpfc_scsi_buf * lpfc_cmd = NULL; struct list_head *scsi_buf_list_get = &phba->lpfc_scsi_buf_list_get; @@ -674,7 +675,8 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) * Pointer to lpfc_scsi_buf - Success **/ static struct lpfc_scsi_buf* -lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, + struct scsi_cmnd *cmnd) { struct lpfc_scsi_buf *lpfc_cmd, *lpfc_cmd_next; struct lpfc_sli4_hdw_queue *qp; @@ -685,12 +687,18 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) dma_addr_t pdma_phys_fcp_cmd; uint32_t sgl_size, cpu, idx; int found = 0; + int tag; cpu = smp_processor_id(); - if (cpu < phba->cfg_hdw_queue) - idx = cpu; - else - idx = cpu % phba->cfg_hdw_queue; + if (cmnd) { + tag = blk_mq_unique_tag(cmnd->request); + idx = blk_mq_unique_tag_to_hwq(tag); + } else { + if (cpu < phba->cfg_hdw_queue) + idx = cpu; + else + idx = cpu % phba->cfg_hdw_queue; + } qp = &phba->sli4_hba.hdwq[idx]; spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag); @@ -815,9 +823,10 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) * Pointer to lpfc_scsi_buf - Success **/ static struct lpfc_scsi_buf* -lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, + struct scsi_cmnd *cmnd) { - return phba->lpfc_get_scsi_buf(phba, ndlp); + return phba->lpfc_get_scsi_buf(phba, ndlp, cmnd); } /** @@ -3658,49 +3667,6 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, } /** - * lpfc_sli4_scmd_to_wqidx_distr - scsi command to SLI4 WQ index distribution - * @phba: Pointer to HBA context object. - * - * This routine performs a roundrobin SCSI command to SLI4 FCP WQ index - * distribution. This is called by __lpfc_sli_issue_iocb_s4() with the hbalock - * held. - * If scsi-mq is enabled, get the default block layer mapping of software queues - * to hardware queues. This information is saved in request tag. - * - * Return: index into SLI4 fast-path FCP queue index. - **/ -int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, - struct lpfc_scsi_buf *lpfc_cmd) -{ - struct scsi_cmnd *cmnd = lpfc_cmd->pCmd; - struct lpfc_vector_map_info *cpup; - int chann, cpu; - uint32_t tag; - uint16_t hwq; - - if (cmnd) { - tag = blk_mq_unique_tag(cmnd->request); - hwq = blk_mq_unique_tag_to_hwq(tag); - - return hwq; - } - - if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_CPU && - phba->cfg_hdw_queue > 1) { - cpu = lpfc_cmd->cpu; - if (cpu < phba->sli4_hba.num_present_cpu) { - cpup = phba->sli4_hba.cpu_map; - cpup += cpu; - return cpup->channel_id; - } - } - chann = atomic_add_return(1, &phba->fcp_qidx); - chann = chann % phba->cfg_hdw_queue; - return chann; -} - - -/** * lpfc_scsi_cmd_iocb_cmpl - Scsi cmnd IOCB completion routine * @phba: The Hba for which this call is being executed. * @pIocbIn: The command IOCBQ for the scsi cmnd. @@ -4474,7 +4440,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) } } - lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp); + lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp, cmnd); if (lpfc_cmd == NULL) { lpfc_rampdown_queue_depth(phba); @@ -4913,7 +4879,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd, return FAILED; pnode = rdata->pnode; - lpfc_cmd = lpfc_get_scsi_buf(phba, pnode); + lpfc_cmd = lpfc_get_scsi_buf(phba, pnode, NULL); if (lpfc_cmd == NULL) return FAILED; lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo; diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h index 58cb6fd575c3..696898087244 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.h +++ b/drivers/scsi/lpfc/lpfc_scsi.h @@ -200,5 +200,3 @@ struct lpfc_scsi_buf { /* For sysfs/debugfs tmp string max len */ #define LPFC_MAX_SCSI_INFO_TMP_LEN 79 -int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, - struct lpfc_scsi_buf *lpfc_cmd); -- 2.13.7