The PBDE feature, setting payload buffer address explicitly in the WQE so it doesn't have to be fetched from the SGL, only makes sense when there is a single buffer for the I/O. When there are multiple buffers it actually hurts performance as the SGL subsequently has to be fetched. Rework the SGL logic to only use PBDE when a single buffer. Co-developed-by: Justin Tee <justin.tee@xxxxxxxxxxxx> Signed-off-by: Justin Tee <justin.tee@xxxxxxxxxxxx> Signed-off-by: James Smart <jsmart2021@xxxxxxxxx> --- drivers/scsi/lpfc/lpfc_nvme.c | 12 +++++----- drivers/scsi/lpfc/lpfc_nvmet.c | 44 ++++++++++++++++------------------ drivers/scsi/lpfc/lpfc_scsi.c | 26 ++++++++++---------- 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 33266e1b24ab..69d3758dd8dc 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1299,7 +1299,6 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, struct sli4_sge *first_data_sgl; struct ulp_bde64 *bde; dma_addr_t physaddr = 0; - uint32_t num_bde = 0; uint32_t dma_len = 0; uint32_t dma_offset = 0; int nseg, i, j; @@ -1353,7 +1352,7 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, } sgl->word2 = 0; - if ((num_bde + 1) == nseg) { + if (nseg == 1) { bf_set(lpfc_sli4_sge_last, sgl, 1); bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); @@ -1422,8 +1421,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, j++; } - if (phba->cfg_enable_pbde) { - /* Use PBDE support for first SGL only, offset == 0 */ + + /* PBDE support for first data SGE only */ + if (nseg == 1 && phba->cfg_enable_pbde) { /* Words 13-15 */ bde = (struct ulp_bde64 *) &wqe->words[13]; @@ -1434,11 +1434,11 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; bde->tus.w = cpu_to_le32(bde->tus.w); - /* Word 11 */ + /* Word 11 - set PBDE bit */ bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); } else { memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3)); - bf_set(wqe_pbde, &wqe->generic.wqe_com, 0); + /* Word 11 - PBDE bit disabled by default template */ } } else { diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6e3dd0b9bcfa..731802527b81 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -2708,7 +2708,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, struct ulp_bde64 *bde; dma_addr_t physaddr; int i, cnt, nsegs; - int do_pbde; + bool use_pbde = false; int xc = 1; if (!lpfc_is_link_up(phba)) { @@ -2816,9 +2816,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, if (!xc) bf_set(wqe_xc, &wqe->fcp_tsend.wqe_com, 0); - /* Word 11 - set sup, irsp, irsplen later */ - do_pbde = 0; - /* Word 12 */ wqe->fcp_tsend.fcp_data_len = rsp->transfer_length; @@ -2896,12 +2893,13 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, if (!xc) bf_set(wqe_xc, &wqe->fcp_treceive.wqe_com, 0); - /* Word 11 - set pbde later */ - if (phba->cfg_enable_pbde) { - do_pbde = 1; + /* Word 11 - check for pbde */ + if (nsegs == 1 && phba->cfg_enable_pbde) { + use_pbde = true; + /* Word 11 - PBDE bit already preset by template */ } else { + /* Overwrite default template setting */ bf_set(wqe_pbde, &wqe->fcp_treceive.wqe_com, 0); - do_pbde = 0; } /* Word 12 */ @@ -2972,7 +2970,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, ((rsp->rsplen >> 2) - 1)); memcpy(&wqe->words[16], rsp->rspaddr, rsp->rsplen); } - do_pbde = 0; /* Word 12 */ wqe->fcp_trsp.rsvd_12_15[0] = 0; @@ -3007,23 +3004,24 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, bf_set(lpfc_sli4_sge_last, sgl, 1); sgl->word2 = cpu_to_le32(sgl->word2); sgl->sge_len = cpu_to_le32(cnt); - if (i == 0) { - bde = (struct ulp_bde64 *)&wqe->words[13]; - if (do_pbde) { - /* Words 13-15 (PBDE) */ - bde->addrLow = sgl->addr_lo; - bde->addrHigh = sgl->addr_hi; - bde->tus.f.bdeSize = - le32_to_cpu(sgl->sge_len); - bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; - bde->tus.w = cpu_to_le32(bde->tus.w); - } else { - memset(bde, 0, sizeof(struct ulp_bde64)); - } - } sgl++; ctxp->offset += cnt; } + + bde = (struct ulp_bde64 *)&wqe->words[13]; + if (use_pbde) { + /* decrement sgl ptr backwards once to first data sge */ + sgl--; + + /* Words 13-15 (PBDE) */ + bde->addrLow = sgl->addr_lo; + bde->addrHigh = sgl->addr_hi; + bde->tus.f.bdeSize = le32_to_cpu(sgl->sge_len); + bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; + bde->tus.w = cpu_to_le32(bde->tus.w); + } else { + memset(bde, 0, sizeof(struct ulp_bde64)); + } ctxp->state = LPFC_NVME_STE_DATA; ctxp->entry_cnt++; return nvmewqe; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a2cd22728b0f..078fbea3f436 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3235,7 +3235,6 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) struct lpfc_vport *vport = phba->pport; union lpfc_wqe128 *wqe = &pwqeq->wqe; dma_addr_t physaddr; - uint32_t num_bde = 0; uint32_t dma_len; uint32_t dma_offset = 0; int nseg, i, j; @@ -3297,7 +3296,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) j = 2; for (i = 0; i < nseg; i++) { sgl->word2 = 0; - if ((num_bde + 1) == nseg) { + if (nseg == 1) { bf_set(lpfc_sli4_sge_last, sgl, 1); bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); @@ -3366,13 +3365,15 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) j++; } - /* - * Setup the first Payload BDE. For FCoE we just key off - * Performance Hints, for FC we use lpfc_enable_pbde. - * We populate words 13-15 of IOCB/WQE. + + /* PBDE support for first data SGE only. + * For FCoE, we key off Performance Hints. + * For FC, we key off lpfc_enable_pbde. */ - if ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) || - phba->cfg_enable_pbde) { + if (nseg == 1 && + ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) || + phba->cfg_enable_pbde)) { + /* Words 13-15 */ bde = (struct ulp_bde64 *) &wqe->words[13]; bde->addrLow = first_data_sgl->addr_lo; @@ -3382,12 +3383,15 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; bde->tus.w = cpu_to_le32(bde->tus.w); + /* Word 11 - set PBDE bit */ + bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); } else { memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3)); + /* Word 11 - PBDE bit disabled by default template */ } } else { sgl += 1; - /* clear the last flag in the fcp_rsp map entry */ + /* set the last flag in the fcp_rsp map entry */ sgl->word2 = le32_to_cpu(sgl->word2); bf_set(lpfc_sli4_sge_last, sgl, 1); sgl->word2 = cpu_to_le32(sgl->word2); @@ -3400,10 +3404,6 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) } } - /* Word 11 */ - if (phba->cfg_enable_pbde) - bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); - /* * Finish initializing those IOCB fields that are dependent on the * scsi_cmnd request_buffer. Note that for SLI-2 the bdeSize is -- 2.26.2