New if_type=6 adapters support an additional BAR that provides apertures to allow direct WQE to adapter push support - termed Direct Packet Push (DPP). WQ creation differs slightly to ask for a WQ to be DPP-ized. When submitting a WQE to a DPP WQ, it is submitted to the host memory for the WQ normally, but is also written by the host cpu directly to a BAR aperture. Write buffer coalescing in hardware is (hopefully) turned on, enabling single pci write operation support. The doorbell is thing rung to indicate the WQE is available and was pushed to the aperture. This patch: - Updates the WQ Create commands for the DPP options - Adds the bar mapping for if_type=6 DPP bar - Adds the WQE pushing to the DDP aperture received from WQ create - Adds a new module parameter to disable DPP operation if desired. Default is enabled. Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx> --- v3: remove unnecessary parens use ioremap_wc() instead of set_memory_wc(). the wc property is now set by default on the BAR. if direct push is disabled, the BAR won't be used so it won't matter what is set on it. Track cases where the ioremap_wc() may not succeed, leaving bar pointer NULL. In this case, disable direct push. As some platforms will honor ioremap_wc() but not truly enable wc, change default for direct push so enabled only on X86. v4: revert to original patch version using set_memory_wc() as benchmarking and different platform testing says its the best option revert to v1 using set_memory_wc() --- drivers/scsi/lpfc/lpfc.h | 3 +- drivers/scsi/lpfc/lpfc_attr.c | 10 ++ drivers/scsi/lpfc/lpfc_hw4.h | 31 ++++++ drivers/scsi/lpfc/lpfc_init.c | 18 ++++ drivers/scsi/lpfc/lpfc_sli.c | 234 +++++++++++++++++++++++++++--------------- drivers/scsi/lpfc/lpfc_sli4.h | 16 ++- 6 files changed, 225 insertions(+), 87 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 9698b9635058..86ffb9756e65 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -840,7 +840,8 @@ struct lpfc_hba { uint32_t cfg_enable_SmartSAN; uint32_t cfg_enable_mds_diags; uint32_t cfg_enable_fc4_type; - uint32_t cfg_enable_bbcr; /*Enable BB Credit Recovery*/ + uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */ + uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */ uint32_t cfg_xri_split; #define LPFC_ENABLE_FCP 1 #define LPFC_ENABLE_NVME 2 diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 7be4bdef4d42..32f17e19e123 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5186,6 +5186,14 @@ LPFC_ATTR_R(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics"); */ LPFC_BBCR_ATTR_RW(enable_bbcr, 1, 0, 1, "Enable BBC Recovery"); +/* + * lpfc_enable_dpp: Enable DPP on G7 + * 0 = DPP on G7 disabled + * 1 = DPP on G7 enabled (default) + * Value range is [0,1]. Default value is 1. + */ +LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push"); + struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_nvme_info, &dev_attr_bg_info, @@ -5294,6 +5302,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_xlane_supported, &dev_attr_lpfc_enable_mds_diags, &dev_attr_lpfc_enable_bbcr, + &dev_attr_lpfc_enable_dpp, NULL, }; @@ -6306,6 +6315,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel); lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel); lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr); + lpfc_enable_dpp_init(phba, lpfc_enable_dpp); if (phba->sli_rev != LPFC_SLI_REV4) { /* NVME only supported on SLI4 */ diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 93fd9fd10a0f..60ccff6fa8b0 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1372,6 +1372,15 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_page_size_MASK 0x000000FF #define lpfc_mbx_wq_create_page_size_WORD word1 #define LPFC_WQ_PAGE_SIZE_4096 0x1 +#define lpfc_mbx_wq_create_dpp_req_SHIFT 15 +#define lpfc_mbx_wq_create_dpp_req_MASK 0x00000001 +#define lpfc_mbx_wq_create_dpp_req_WORD word1 +#define lpfc_mbx_wq_create_doe_SHIFT 14 +#define lpfc_mbx_wq_create_doe_MASK 0x00000001 +#define lpfc_mbx_wq_create_doe_WORD word1 +#define lpfc_mbx_wq_create_toe_SHIFT 13 +#define lpfc_mbx_wq_create_toe_MASK 0x00000001 +#define lpfc_mbx_wq_create_toe_WORD word1 #define lpfc_mbx_wq_create_wqe_size_SHIFT 8 #define lpfc_mbx_wq_create_wqe_size_MASK 0x0000000F #define lpfc_mbx_wq_create_wqe_size_WORD word1 @@ -1400,6 +1409,28 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF #define lpfc_mbx_wq_create_db_format_WORD word2 } response; + struct { + uint32_t word0; +#define lpfc_mbx_wq_create_dpp_rsp_SHIFT 31 +#define lpfc_mbx_wq_create_dpp_rsp_MASK 0x00000001 +#define lpfc_mbx_wq_create_dpp_rsp_WORD word0 +#define lpfc_mbx_wq_create_v1_q_id_SHIFT 0 +#define lpfc_mbx_wq_create_v1_q_id_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_v1_q_id_WORD word0 + uint32_t word1; +#define lpfc_mbx_wq_create_v1_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_v1_bar_set_MASK 0x0000000F +#define lpfc_mbx_wq_create_v1_bar_set_WORD word1 + uint32_t doorbell_offset; + uint32_t word3; +#define lpfc_mbx_wq_create_dpp_id_SHIFT 16 +#define lpfc_mbx_wq_create_dpp_id_MASK 0x0000001F +#define lpfc_mbx_wq_create_dpp_id_WORD word3 +#define lpfc_mbx_wq_create_dpp_bar_SHIFT 0 +#define lpfc_mbx_wq_create_dpp_bar_MASK 0x0000000F +#define lpfc_mbx_wq_create_dpp_bar_WORD word3 + uint32_t dpp_offset; + } response_1; } u; }; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0d51ecb7317f..5f0c63bb0a4d 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -9611,6 +9611,24 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) } } + if (if_type == LPFC_SLI_INTF_IF_TYPE_6 && + pci_resource_start(pdev, PCI_64BIT_BAR4)) { + /* + * Map SLI4 if type 6 HBA DPP Register base to a kernel + * virtual address and setup the registers. + */ + phba->pci_bar2_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->sli4_hba.dpp_regs_memmap_p = + ioremap(phba->pci_bar2_map, bar2map_len); + if (!phba->sli4_hba.dpp_regs_memmap_p) { + dev_err(&pdev->dev, + "ioremap failed for SLI4 HBA dpp registers.\n"); + goto out_iounmap_ctrl; + } + phba->pci_bar4_memmap_p = phba->sli4_hba.dpp_regs_memmap_p; + } + /* Set up the EQ/CQ register handeling functions now */ switch (if_type) { case LPFC_SLI_INTF_IF_TYPE_0: diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 3bff1f9c5df7..4c316829923c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -35,6 +35,9 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> #include <linux/aer.h> +#ifdef CONFIG_X86 +#include <asm/set_memory.h> +#endif #include <linux/nvme-fc-driver.h> @@ -112,6 +115,8 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) struct lpfc_register doorbell; uint32_t host_index; uint32_t idx; + uint32_t i = 0; + uint8_t *tmp; /* sanity check on queue memory */ if (unlikely(!q)) @@ -133,7 +138,18 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) if (q->phba->sli3_options & LPFC_SLI4_PHWQ_ENABLED) bf_set(wqe_wqid, &wqe->generic.wqe_com, q->queue_id); lpfc_sli_pcimem_bcopy(wqe, temp_wqe, q->entry_size); - /* ensure WQE bcopy flushed before doorbell write */ + if (q->dpp_enable && q->phba->cfg_enable_dpp) { + /* write to DPP aperture taking advatage of Combined Writes */ + tmp = (uint8_t *)wqe; +#ifdef CONFIG_64BIT + for (i = 0; i < q->entry_size; i += sizeof(uint64_t)) + writeq(*((uint64_t *)(tmp + i)), q->dpp_regaddr + i); +#else + for (i = 0; i < q->entry_size; i += sizeof(uint32_t)) + writel(*((uint32_t *)(tmp + i)), q->dpp_regaddr + i); +#endif + } + /* ensure WQE bcopy and DPP flushed before doorbell write */ wmb(); /* Update the host index before invoking device */ @@ -144,9 +160,18 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; if (q->db_format == LPFC_DB_LIST_FORMAT) { - bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); - bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); - bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + if (q->dpp_enable && q->phba->cfg_enable_dpp) { + bf_set(lpfc_if6_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_if6_wq_db_list_fm_dpp, &doorbell, 1); + bf_set(lpfc_if6_wq_db_list_fm_dpp_id, &doorbell, + q->dpp_id); + bf_set(lpfc_if6_wq_db_list_fm_id, &doorbell, + q->queue_id); + } else { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } } else if (q->db_format == LPFC_DB_RING_FORMAT) { bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); @@ -15023,6 +15048,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, void __iomem *bar_memmap_p; uint32_t db_offset; uint16_t pci_barset; + uint8_t dpp_barset; + uint32_t dpp_offset; + unsigned long pg_addr; uint8_t wq_create_version; /* sanity check on queue memory */ @@ -15056,38 +15084,13 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, else wq_create_version = LPFC_Q_CREATE_VERSION_0; - switch (wq_create_version) { - case LPFC_Q_CREATE_VERSION_0: - switch (wq->entry_size) { - default: - case 64: - /* Nothing to do, version 0 ONLY supports 64 byte */ - page = wq_create->u.request.page; - break; - case 128: - if (!(phba->sli4_hba.pc_sli4_params.wqsize & - LPFC_WQ_SZ128_SUPPORT)) { - status = -ERANGE; - goto out; - } - /* If we get here the HBA MUST also support V1 and - * we MUST use it - */ - bf_set(lpfc_mbox_hdr_version, &shdr->request, - LPFC_Q_CREATE_VERSION_1); - bf_set(lpfc_mbx_wq_create_wqe_count, - &wq_create->u.request_1, wq->entry_count); - bf_set(lpfc_mbx_wq_create_wqe_size, - &wq_create->u.request_1, - LPFC_WQ_WQE_SIZE_128); - bf_set(lpfc_mbx_wq_create_page_size, - &wq_create->u.request_1, - LPFC_WQ_PAGE_SIZE_4096); - page = wq_create->u.request_1.page; - break; - } - break; + if (phba->sli4_hba.pc_sli4_params.wqsize & LPFC_WQ_SZ128_SUPPORT) + wq_create_version = LPFC_Q_CREATE_VERSION_1; + else + wq_create_version = LPFC_Q_CREATE_VERSION_0; + + switch (wq_create_version) { case LPFC_Q_CREATE_VERSION_1: bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -15102,24 +15105,21 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, LPFC_WQ_WQE_SIZE_64); break; case 128: - if (!(phba->sli4_hba.pc_sli4_params.wqsize & - LPFC_WQ_SZ128_SUPPORT)) { - status = -ERANGE; - goto out; - } bf_set(lpfc_mbx_wq_create_wqe_size, &wq_create->u.request_1, LPFC_WQ_WQE_SIZE_128); break; } + /* Request DPP by default */ + bf_set(lpfc_mbx_wq_create_dpp_req, &wq_create->u.request_1, 1); bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1, (wq->page_size / SLI4_PAGE_SIZE)); page = wq_create->u.request_1.page; break; default: - status = -ERANGE; - goto out; + page = wq_create->u.request.page; + break; } list_for_each_entry(dmabuf, &wq->page_list, list) { @@ -15143,52 +15143,120 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } - wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id, &wq_create->u.response); + + if (wq_create_version == LPFC_Q_CREATE_VERSION_0) + wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id, + &wq_create->u.response); + else + wq->queue_id = bf_get(lpfc_mbx_wq_create_v1_q_id, + &wq_create->u.response_1); + if (wq->queue_id == 0xFFFF) { status = -ENXIO; goto out; } - if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { - wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, - &wq_create->u.response); - if ((wq->db_format != LPFC_DB_LIST_FORMAT) && - (wq->db_format != LPFC_DB_RING_FORMAT)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3265 WQ[%d] doorbell format not " - "supported: x%x\n", wq->queue_id, - wq->db_format); - status = -EINVAL; - goto out; - } - pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, - &wq_create->u.response); - bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); - if (!bar_memmap_p) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3263 WQ[%d] failed to memmap pci " - "barset:x%x\n", wq->queue_id, - pci_barset); - status = -ENOMEM; - goto out; - } - db_offset = wq_create->u.response.doorbell_offset; - if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && - (db_offset != LPFC_ULP1_WQ_DOORBELL)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3252 WQ[%d] doorbell offset not " - "supported: x%x\n", wq->queue_id, - db_offset); - status = -EINVAL; - goto out; - } - wq->db_regaddr = bar_memmap_p + db_offset; - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3264 WQ[%d]: barset:x%x, offset:x%x, " - "format:x%x\n", wq->queue_id, pci_barset, - db_offset, wq->db_format); + + wq->db_format = LPFC_DB_LIST_FORMAT; + if (wq_create_version == LPFC_Q_CREATE_VERSION_0) { + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format " + "not supported: x%x\n", + wq->queue_id, wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, + pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap " + "pci barset:x%x\n", + wq->queue_id, pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset " + "not supported: x%x\n", + wq->queue_id, db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x, " + "format:x%x\n", wq->queue_id, + pci_barset, db_offset, wq->db_format); + } else + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; } else { - wq->db_format = LPFC_DB_LIST_FORMAT; - wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + /* Check if DPP was honored by the firmware */ + wq->dpp_enable = bf_get(lpfc_mbx_wq_create_dpp_rsp, + &wq_create->u.response_1); + if (wq->dpp_enable) { + pci_barset = bf_get(lpfc_mbx_wq_create_v1_bar_set, + &wq_create->u.response_1); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, + pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3267 WQ[%d] failed to memmap " + "pci barset:x%x\n", + wq->queue_id, pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response_1.doorbell_offset; + wq->db_regaddr = bar_memmap_p + db_offset; + wq->dpp_id = bf_get(lpfc_mbx_wq_create_dpp_id, + &wq_create->u.response_1); + dpp_barset = bf_get(lpfc_mbx_wq_create_dpp_bar, + &wq_create->u.response_1); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, + dpp_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3268 WQ[%d] failed to memmap " + "pci barset:x%x\n", + wq->queue_id, dpp_barset); + status = -ENOMEM; + goto out; + } + dpp_offset = wq_create->u.response_1.dpp_offset; + wq->dpp_regaddr = bar_memmap_p + dpp_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3271 WQ[%d]: barset:x%x, offset:x%x, " + "dpp_id:x%x dpp_barset:x%x " + "dpp_offset:x%x\n", + wq->queue_id, pci_barset, db_offset, + wq->dpp_id, dpp_barset, dpp_offset); + + /* Enable combined writes for DPP aperture */ + pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK; +#ifdef CONFIG_X86 + rc = set_memory_wc(pg_addr, 1); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3272 Cannot setup Combined " + "Write on WQ[%d] - disable DPP\n", + wq->queue_id); + phba->cfg_enable_dpp = 0; + } +#else + phba->cfg_enable_dpp = 0; +#endif + } else + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; } wq->pring = kzalloc(sizeof(struct lpfc_sli_ring), GFP_KERNEL); if (wq->pring == NULL) { diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 33838b4b28d9..708167b309bd 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -180,6 +180,10 @@ struct lpfc_queue { uint8_t q_flag; #define HBA_NVMET_WQFULL 0x1 /* We hit WQ Full condition for NVMET */ void __iomem *db_regaddr; + uint16_t dpp_enable; + uint16_t dpp_id; + void __iomem *dpp_regaddr; + /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -524,11 +528,17 @@ struct lpfc_vector_map_info { /* SLI4 HBA data structure entries */ struct lpfc_sli4_hba { void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for - PCI BAR0, config space registers */ + * config space registers + */ void __iomem *ctrl_regs_memmap_p; /* Kernel memory mapped address for - PCI BAR1, control registers */ + * control registers + */ void __iomem *drbl_regs_memmap_p; /* Kernel memory mapped address for - PCI BAR2, doorbell registers */ + * doorbell registers + */ + void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for + * dpp registers + */ union { struct { /* IF Type 0, BAR 0 PCI cfg space reg mem map */ -- 2.13.1