Use the callback of .setup_affinity() to re-caculate number of queues, and build irqs affinity with help of irq_build_affinity(). Then nvme_setup_irqs() gets simplified a lot. Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/nvme/host/pci.c | 97 ++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9bc585415d9b..24496de0a29b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2078,17 +2078,58 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) } } +static int nvme_setup_affinity(const struct irq_affinity *affd, + struct irq_affinity_desc *masks, + unsigned int nmasks) +{ + struct nvme_dev *dev = affd->priv; + int affvecs = nmasks - affd->pre_vectors - affd->post_vectors; + int curvec, usedvecs; + int i; + + nvme_calc_io_queues(dev, nmasks); + + /* Fill out vectors at the beginning that don't need affinity */ + for (curvec = 0; curvec < affd->pre_vectors; curvec++) + cpumask_copy(&masks[curvec].mask, cpu_possible_mask); + + for (i = 0, usedvecs = 0; i < HCTX_TYPE_POLL; i++) { + int this_vecs = dev->io_queues[i]; + int ret; + + if (!this_vecs) + break; + + ret = irq_build_affinity(affd, curvec, this_vecs, curvec, + masks, nmasks); + if (ret) + return ret; + + curvec += this_vecs; + usedvecs += this_vecs; + } + + /* Fill out vectors at the end that don't need affinity */ + curvec = affd->pre_vectors + min(usedvecs, affvecs); + for (; curvec < nmasks; curvec++) + cpumask_copy(&masks[curvec].mask, cpu_possible_mask); + + /* Mark the managed interrupts */ + for (i = affd->pre_vectors; i < nmasks - affd->post_vectors; i++) + masks[i].is_managed = 1; + + return 0; +} + static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) { struct pci_dev *pdev = to_pci_dev(dev->dev); - int irq_sets[2]; struct irq_affinity affd = { .pre_vectors = 1, - .nr_sets = ARRAY_SIZE(irq_sets), - .sets = irq_sets, + .setup_affinity = nvme_setup_affinity, + .priv = dev, }; - int result = 0; - unsigned int irq_queues, this_p_queues; + int result, irq_queues, this_p_queues; /* * Poll queues don't need interrupts, but we need at least one IO @@ -2103,50 +2144,8 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; - /* - * For irq sets, we have to ask for minvec == maxvec. This passes - * any reduction back to us, so we can adjust our queue counts and - * IRQ vector needs. - */ - do { - nvme_calc_io_queues(dev, irq_queues); - irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; - irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; - if (!irq_sets[1]) - affd.nr_sets = 1; - - /* - * If we got a failure and we're down to asking for just - * 1 + 1 queues, just ask for a single vector. We'll share - * that between the single IO queue and the admin queue. - * Otherwise, we assign one independent vector to admin queue. - */ - if (irq_queues > 1) - irq_queues = irq_sets[0] + irq_sets[1] + 1; - - result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, - irq_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); - - /* - * Need to reduce our vec counts. If we get ENOSPC, the - * platform should support mulitple vecs, we just need - * to decrease our ask. If we get EINVAL, the platform - * likely does not. Back down to ask for just one vector. - */ - if (result == -ENOSPC) { - irq_queues--; - if (!irq_queues) - return result; - continue; - } else if (result == -EINVAL) { - irq_queues = 1; - continue; - } else if (result <= 0) - return -EIO; - break; - } while (1); - + result = pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); return result; } -- 2.9.5