On Thu, Oct 24, 2019 at 10:21:21PM +0800, John Garry wrote: > Since we're not ready to expose mutliple queues to the upper layer always > due to CPU hotplug issue, add a new interim experimental command line > option to support it. > > We still need to keep supporting auto_affine_msi_experimental, since > people are now replying the performance it provides, even though it is > unsafe. > > If auto_affine_msi_experimental and expose_mq_experimental are both set, > then auto_affine_msi_experimental takes preference. > > Signed-off-by: John Garry <john.garry@xxxxxxxxxx> > --- > drivers/scsi/hisi_sas/hisi_sas.h | 2 + > drivers/scsi/hisi_sas/hisi_sas_main.c | 55 ++++++++++++++++---------- > drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 51 +++++++++++++++++++++--- > 3 files changed, 83 insertions(+), 25 deletions(-) > > diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h > index 4eb8f1c53f78..884f2426d753 100644 > --- a/drivers/scsi/hisi_sas/hisi_sas.h > +++ b/drivers/scsi/hisi_sas/hisi_sas.h > @@ -8,6 +8,8 @@ > #define _HISI_SAS_H_ > > #include <linux/acpi.h> > +#include <linux/blk-mq.h> > +#include <linux/blk-mq-pci.h> > #include <linux/clk.h> > #include <linux/debugfs.h> > #include <linux/dmapool.h> > diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c > index 53802c1cc1d0..c8c96a46acfd 100644 > --- a/drivers/scsi/hisi_sas/hisi_sas_main.c > +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c > @@ -389,9 +389,11 @@ static int hisi_sas_task_prep(struct sas_task *task, > struct hisi_sas_slot *slot; > struct hisi_sas_cmd_hdr *cmd_hdr_base; > struct asd_sas_port *sas_port = device->port; > + struct Scsi_Host *shost = hisi_hba->shost; > struct device *dev = hisi_hba->dev; > int dlvry_queue_slot, dlvry_queue, rc, slot_idx; > int n_elem = 0, n_elem_dif = 0, n_elem_req = 0; > + struct scsi_cmnd *scmd = NULL; > struct hisi_sas_dq *dq; > unsigned long flags; > int wr_q_index; > @@ -407,13 +409,38 @@ static int hisi_sas_task_prep(struct sas_task *task, > return -ECOMM; > } > > - if (hisi_hba->reply_map) { > - int cpu = raw_smp_processor_id(); > - unsigned int dq_index = hisi_hba->reply_map[cpu]; > + if (task->uldd_task) { > + struct ata_queued_cmd *qc; > > - *dq_pointer = dq = &hisi_hba->dq[dq_index]; > - } else { > + if (dev_is_sata(device)) { > + qc = task->uldd_task; > + scmd = qc->scsicmd; > + } else { > + scmd = task->uldd_task; > + } > + } > + > + /* We have to move to just a single mode: expose multiple queues */ > + if (!hisi_hba->reply_map && !shost->nr_hw_queues) { > *dq_pointer = dq = sas_dev->dq; > + } else { > + if (hisi_hba->reply_map) { > + int cpu = raw_smp_processor_id(); > + unsigned int dq_index = hisi_hba->reply_map[cpu]; > + > + *dq_pointer = dq = &hisi_hba->dq[dq_index]; > + } else { > + if (scmd) { > + unsigned int dq_index; > + u32 blk_tag; > + > + blk_tag = blk_mq_unique_tag(scmd->request); > + dq_index = blk_mq_unique_tag_to_hwq(blk_tag); > + *dq_pointer = dq = &hisi_hba->dq[dq_index]; > + } else { > + *dq_pointer = dq = sas_dev->dq; > + } > + } > } > > port = to_hisi_sas_port(sas_port); > @@ -438,22 +465,10 @@ static int hisi_sas_task_prep(struct sas_task *task, > } > > if (hisi_hba->hw->slot_index_alloc) > - rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device, NULL); > - else { > - struct scsi_cmnd *scsi_cmnd = NULL; > - > - if (task->uldd_task) { > - struct ata_queued_cmd *qc; > + rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device, scmd); > + else > + rc = hisi_sas_slot_index_alloc(hisi_hba, scmd); > > - if (dev_is_sata(device)) { > - qc = task->uldd_task; > - scsi_cmnd = qc->scsicmd; > - } else { > - scsi_cmnd = task->uldd_task; > - } > - } > - rc = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd); > - } > if (rc < 0) > goto err_out_dif_dma_unmap; > > diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c > index 29119d0b27a7..03ba0416f910 100644 > --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c > +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c > @@ -512,6 +512,11 @@ module_param(auto_affine_msi_experimental, bool, 0444); > MODULE_PARM_DESC(auto_affine_msi_experimental, "Enable auto-affinity of MSI IRQs as experimental:\n" > "default is off"); > > +static bool expose_mq_experimental; > +module_param(expose_mq_experimental, bool, 0444); > +MODULE_PARM_DESC(expose_mq_experimental, "Expose multiple hw queues to upper layer as experimental:\n" > + "default is off"); > + > static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) > { > void __iomem *regs = hisi_hba->regs + off; > @@ -558,6 +563,11 @@ static u32 hisi_sas_phy_read32(struct hisi_hba *hisi_hba, > > static int bitmaps_alloc_v3_hw(struct hisi_hba *hisi_hba) > { > + if (expose_mq_experimental) > + return sbitmap_init_node(&hisi_hba->slot_index_tags, > + HISI_SAS_MAX_COMMANDS, -1, > + GFP_KERNEL, > + dev_to_node(hisi_hba->dev)); > return sbitmap_init_node(&hisi_hba->slot_index_tags, > HISI_SAS_UNRESERVED_IPTT, -1, > GFP_KERNEL, dev_to_node(hisi_hba->dev)); > @@ -570,6 +580,10 @@ static int slot_index_alloc_v3_hw(struct hisi_hba *hisi_hba, > struct sbitmap *slot_index_tags = &hisi_hba->slot_index_tags; > int index; > > + if (expose_mq_experimental) > + return sbitmap_get(slot_index_tags, > + hisi_hba->sbitmap_alloc_hint, false); > + > if (scmd) > return scmd->request->tag; > > @@ -583,7 +597,10 @@ static void slot_index_free_v3_hw(struct hisi_hba *hisi_hba, int slot_idx) > { > struct sbitmap *slot_index_tags = &hisi_hba->slot_index_tags; > > - if (slot_idx >= HISI_SAS_UNRESERVED_IPTT) > + if (expose_mq_experimental) { > + sbitmap_clear_bit(slot_index_tags, slot_idx); > + hisi_hba->sbitmap_alloc_hint = slot_idx; > + } else if (slot_idx >= HISI_SAS_UNRESERVED_IPTT) > sbitmap_clear_bit(slot_index_tags, > slot_idx - HISI_SAS_UNRESERVED_IPTT); > } > @@ -2414,8 +2431,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) > struct device *dev = hisi_hba->dev; > int vectors; > int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; > + struct Scsi_Host *shost = hisi_hba->shost; > > - if (auto_affine_msi_experimental) { > + if (auto_affine_msi_experimental || expose_mq_experimental) { > struct irq_affinity desc = { > .pre_vectors = BASE_VECTORS_V3_HW, > }; > @@ -2434,7 +2452,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) > &desc); > if (vectors < 0) > return -ENOENT; > - setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW); > + if (auto_affine_msi_experimental) > + setup_reply_map_v3_hw(hisi_hba, > + vectors - BASE_VECTORS_V3_HW); > } else { > min_msi = max_msi; > vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi, > @@ -2444,6 +2464,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) > } > > hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; > + if (expose_mq_experimental) > + shost->nr_hw_queues = hisi_hba->cq_nvecs; > + > return 0; > } > > @@ -3096,6 +3119,17 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable) > return 0; > } > > +static int hisi_sas_map_queues(struct Scsi_Host *shost) > +{ > + struct hisi_hba *hisi_hba = shost_priv(shost); > + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; > + > + if (expose_mq_experimental) > + return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev, > + BASE_VECTORS_V3_HW); > + return blk_mq_map_queues(qmap); > +} > + > static struct scsi_host_template sht_v3_hw = { > .name = DRV_NAME, > .module = THIS_MODULE, > @@ -3104,6 +3138,7 @@ static struct scsi_host_template sht_v3_hw = { > .slave_configure = hisi_sas_slave_configure, > .scan_finished = hisi_sas_scan_finished, > .scan_start = hisi_sas_scan_start, > + .map_queues = hisi_sas_map_queues, > .change_queue_depth = sas_change_queue_depth, > .bios_param = sas_bios_param, > .this_id = -1, > @@ -3265,8 +3300,14 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) > shost->max_lun = ~0; > shost->max_channel = 1; > shost->max_cmd_len = 16; > - shost->can_queue = HISI_SAS_UNRESERVED_IPTT; > - shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT; > + > + if (expose_mq_experimental) { > + shost->can_queue = HISI_SAS_MAX_COMMANDS; > + shost->cmd_per_lun = HISI_SAS_MAX_COMMANDS; The above is contradictory with current 'nr_hw_queues''s meaning, see commit on Scsi_Host.nr_hw_queues. /* * In scsi-mq mode, the number of hardware queues supported by the LLD. * * Note: it is assumed that each hardware queue has a queue depth of * can_queue. In other words, the total queue depth per host * is nr_hw_queues * can_queue. */ Also this implementation wastes memory too much. thanks, Ming