Change the sli4 queue creation code to use numa node based memory allocation based on the cpu the queues will be related to. Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <jsmart2021@xxxxxxxxx> --- drivers/scsi/lpfc/lpfc_init.c | 72 ++++++++++++++++++++++++++----------------- drivers/scsi/lpfc/lpfc_sli.c | 10 +++--- drivers/scsi/lpfc/lpfc_sli4.h | 6 ++-- 3 files changed, 53 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 212adc9d2c56..2a51146661aa 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8631,10 +8631,12 @@ static int lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx) { struct lpfc_queue *qdesc; + int cpu; + cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, phba->sli4_hba.cq_esize, - LPFC_CQE_EXP_COUNT); + LPFC_CQE_EXP_COUNT, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0508 Failed allocate fast-path NVME CQ (%d)\n", @@ -8643,11 +8645,12 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx) } qdesc->qe_valid = 1; qdesc->hdwq = wqidx; - qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); + qdesc->chann = cpu; phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc; qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, - LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT); + LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT, + cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0509 Failed allocate fast-path NVME WQ (%d)\n", @@ -8666,18 +8669,20 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) { struct lpfc_queue *qdesc; uint32_t wqesize; + int cpu; + cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); /* Create Fast Path FCP CQs */ if (phba->enab_exp_wqcq_pages) /* Increase the CQ size when WQEs contain an embedded cdb */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, phba->sli4_hba.cq_esize, - LPFC_CQE_EXP_COUNT); + LPFC_CQE_EXP_COUNT, cpu); else qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, - phba->sli4_hba.cq_ecount); + phba->sli4_hba.cq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx); @@ -8685,7 +8690,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) } qdesc->qe_valid = 1; qdesc->hdwq = wqidx; - qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); + qdesc->chann = cpu; phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc; /* Create Fast Path FCP WQs */ @@ -8695,11 +8700,11 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, wqesize, - LPFC_WQE_EXP_COUNT); + LPFC_WQE_EXP_COUNT, cpu); } else qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.wq_esize, - phba->sli4_hba.wq_ecount); + phba->sli4_hba.wq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -8732,7 +8737,7 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx, eqidx; + int idx, eqidx, cpu; struct lpfc_sli4_hdw_queue *qp; struct lpfc_eq_intr_info *eqi; @@ -8819,13 +8824,15 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create HBA Event Queues (EQs) */ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + /* determine EQ affinity */ + eqidx = lpfc_find_eq_handle(phba, idx); + cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ); /* * If there are more Hardware Queues than available - * CQs, multiple Hardware Queues may share a common EQ. + * EQs, multiple Hardware Queues may share a common EQ. */ if (idx >= phba->cfg_irq_chann) { /* Share an existing EQ */ - eqidx = lpfc_find_eq_handle(phba, idx); phba->sli4_hba.hdwq[idx].hba_eq = phba->sli4_hba.hdwq[eqidx].hba_eq; continue; @@ -8833,7 +8840,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create an EQ */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.eq_esize, - phba->sli4_hba.eq_ecount); + phba->sli4_hba.eq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0497 Failed allocate EQ (%d)\n", idx); @@ -8843,9 +8850,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) qdesc->hdwq = idx; /* Save the CPU this EQ is affinitised to */ - eqidx = lpfc_find_eq_handle(phba, idx); - qdesc->chann = lpfc_find_cpu_handle(phba, eqidx, - LPFC_FIND_BY_EQ); + qdesc->chann = cpu; phba->sli4_hba.hdwq[idx].hba_eq = qdesc; qdesc->last_cpu = qdesc->chann; eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu); @@ -8868,11 +8873,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) if (phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { + cpu = lpfc_find_cpu_handle(phba, idx, + LPFC_FIND_BY_HDWQ); qdesc = lpfc_sli4_queue_alloc( phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, - phba->sli4_hba.cq_ecount); + phba->sli4_hba.cq_ecount, + cpu); if (!qdesc) { lpfc_printf_log( phba, KERN_ERR, LOG_INIT, @@ -8882,7 +8890,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } qdesc->qe_valid = 1; qdesc->hdwq = idx; - qdesc->chann = idx; + qdesc->chann = cpu; phba->sli4_hba.nvmet_cqset[idx] = qdesc; } } @@ -8892,10 +8900,11 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) * Create Slow Path Completion Queues (CQs) */ + cpu = lpfc_find_cpu_handle(phba, 0, LPFC_FIND_BY_EQ); /* Create slow-path Mailbox Command Complete Queue */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, - phba->sli4_hba.cq_ecount); + phba->sli4_hba.cq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0500 Failed allocate slow-path mailbox CQ\n"); @@ -8907,7 +8916,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create slow-path ELS Complete Queue */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, - phba->sli4_hba.cq_ecount); + phba->sli4_hba.cq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0501 Failed allocate slow-path ELS CQ\n"); @@ -8926,7 +8935,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.mq_esize, - phba->sli4_hba.mq_ecount); + phba->sli4_hba.mq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0505 Failed allocate slow-path MQ\n"); @@ -8942,7 +8951,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create slow-path ELS Work Queue */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.wq_esize, - phba->sli4_hba.wq_ecount); + phba->sli4_hba.wq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0504 Failed allocate slow-path ELS WQ\n"); @@ -8956,7 +8965,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create NVME LS Complete Queue */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, - phba->sli4_hba.cq_ecount); + phba->sli4_hba.cq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6079 Failed allocate NVME LS CQ\n"); @@ -8969,7 +8978,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create NVME LS Work Queue */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.wq_esize, - phba->sli4_hba.wq_ecount); + phba->sli4_hba.wq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6080 Failed allocate NVME LS WQ\n"); @@ -8987,7 +8996,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create Receive Queue for header */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.rq_esize, - phba->sli4_hba.rq_ecount); + phba->sli4_hba.rq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0506 Failed allocate receive HRQ\n"); @@ -8998,7 +9007,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create Receive Queue for data */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.rq_esize, - phba->sli4_hba.rq_ecount); + phba->sli4_hba.rq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0507 Failed allocate receive DRQ\n"); @@ -9009,11 +9018,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) && phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { + cpu = lpfc_find_cpu_handle(phba, idx, + LPFC_FIND_BY_HDWQ); /* Create NVMET Receive Queue for header */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.rq_esize, - LPFC_NVMET_RQE_DEF_COUNT); + LPFC_NVMET_RQE_DEF_COUNT, + cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3146 Failed allocate " @@ -9024,8 +9036,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc; /* Only needed for header of RQ pair */ - qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb), - GFP_KERNEL); + qdesc->rqbp = kzalloc_node(sizeof(*qdesc->rqbp), + GFP_KERNEL, + cpu_to_node(cpu)); if (qdesc->rqbp == NULL) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6131 Failed allocate " @@ -9040,7 +9053,8 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.rq_esize, - LPFC_NVMET_RQE_DEF_COUNT); + LPFC_NVMET_RQE_DEF_COUNT, + cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3156 Failed allocate " diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 5b630643d950..5f556f8bc6fb 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -14496,6 +14496,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue) * @page_size: The size of a queue page * @entry_size: The size of each queue entry for this queue. * @entry count: The number of entries that this queue will handle. + * @cpu: The cpu that will primarily utilize this queue. * * This function allocates a queue structure and the DMAable memory used for * the host resident queue. This function must be called before creating the @@ -14503,7 +14504,7 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue) **/ struct lpfc_queue * lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, - uint32_t entry_size, uint32_t entry_count) + uint32_t entry_size, uint32_t entry_count, int cpu) { struct lpfc_queue *queue; struct lpfc_dmabuf *dmabuf; @@ -14519,8 +14520,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt) pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt; - queue = kzalloc(sizeof(struct lpfc_queue) + - (sizeof(void *) * pgcnt), GFP_KERNEL); + queue = kzalloc_node(sizeof(*queue) + (sizeof(void *) * pgcnt), + GFP_KERNEL, cpu_to_node(cpu)); if (!queue) return NULL; @@ -14543,7 +14544,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, queue->phba = phba; for (x = 0; x < queue->page_count; x++) { - dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + dmabuf = kzalloc_node(sizeof(*dmabuf), GFP_KERNEL, + dev_to_node(&phba->pcidev->dev)); if (!dmabuf) goto out_fail; dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev, diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index b86ac85b65d0..bd5b5c3de35e 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -989,8 +989,10 @@ int lpfc_sli4_mbx_read_fcf_rec(struct lpfc_hba *, struct lpfcMboxq *, uint16_t); void lpfc_sli4_hba_reset(struct lpfc_hba *); -struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t, - uint32_t, uint32_t); +struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *phba, + uint32_t page_size, + uint32_t entry_size, + uint32_t entry_count, int cpu); void lpfc_sli4_queue_free(struct lpfc_queue *); int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t); void lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, -- 2.13.7