From: Tsai Sung-Fu <danielsftsai@xxxxxxxxxx> Setup the struct irq_affinity at EP side, and passing that as 1 of the function parameter when endpoint calls pci_alloc_irq_vectors_affinity, this could help to setup non-default irq_affinity for target irq (end up in irq_desc->irq_common_data.affinity), and we can make use of that to separate msi vector out to bind to other msi controller. In current design, we have 8 msi controllers, and each of them own up to 32 msi vectors, layout as below msi_controller0 <- msi_vector0 ~ 31 msi_controller1 <- msi_vector32 ~ 63 msi_controller2 <- msi_vector64 ~ 95 . . . msi_controller7 <- msi_vector224 ~ 255 dw_pcie_irq_domain_alloc is allocating msi vector number in a contiguous fashion, that would end up those allocated msi vectors all handled by the same msi_controller, which all of them would have irq affinity in equal. To separate out to different CPU, we need to distribute msi vectors to different msi_controller, which require to allocate the msi vector in a stride fashion. To do that, the CL make use the cpumask_var_t setup by the endpoint, compare against to see if the affinities are the same, if they are, bind to msi controller which previously allocated msi vector goes to, if they are not, assign to new msi controller. Signed-off-by: Tsai Sung-Fu <danielsftsai@xxxxxxxxxx> --- .../pci/controller/dwc/pcie-designware-host.c | 80 +++++++++++++++---- drivers/pci/controller/dwc/pcie-designware.h | 2 + 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index d2291c3ceb8be..192d05c473b3b 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -181,25 +181,75 @@ static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, void *args) { struct dw_pcie_rp *pp = domain->host_data; - unsigned long flags; - u32 i; - int bit; + const struct cpumask *mask; + unsigned long flags, index, start, size; + int irq, ctrl, p_irq, *msi_vec_index; + unsigned int controller_count = (pp->num_vectors / MAX_MSI_IRQS_PER_CTRL); + + /* + * All IRQs on a given controller will use the same parent interrupt, + * and therefore the same CPU affinity. We try to honor any CPU spreading + * requests by assigning distinct affinity masks to distinct vectors. + * So instead of always allocating the msi vectors in a contiguous fashion, + * the algo here honor whoever comes first can bind the msi controller to + * its irq affinity mask, or compare its cpumask against + * currently recorded to decide if binding to this msi controller. + */ + + msi_vec_index = kcalloc(nr_irqs, sizeof(*msi_vec_index), GFP_KERNEL); + if (!msi_vec_index) + return -ENOMEM; raw_spin_lock_irqsave(&pp->lock, flags); - bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors, - order_base_2(nr_irqs)); + for (irq = 0; irq < nr_irqs; irq++) { + mask = irq_get_affinity_mask(virq + irq); + for (ctrl = 0; ctrl < controller_count; ctrl++) { + start = ctrl * MAX_MSI_IRQS_PER_CTRL; + size = start + MAX_MSI_IRQS_PER_CTRL; + if (find_next_bit(pp->msi_irq_in_use, size, start) >= size) { + cpumask_copy(&pp->msi_ctrl_to_cpu[ctrl], mask); + break; + } - raw_spin_unlock_irqrestore(&pp->lock, flags); + if (cpumask_equal(&pp->msi_ctrl_to_cpu[ctrl], mask) && + find_next_zero_bit(pp->msi_irq_in_use, size, start) < size) + break; + } - if (bit < 0) - return -ENOSPC; + /* + * no msi controller matches, we would error return (no space) and + * clear those previously allocated bit, because all those msi vectors + * didn't really successfully allocated, so we shouldn't occupied that + * position in the bitmap in case other endpoint may still make use of + * those. An extra step when choosing to not allocate in contiguous + * fashion. + */ + if (ctrl == controller_count) { + for (p_irq = irq - 1; p_irq >= 0; p_irq--) + bitmap_clear(pp->msi_irq_in_use, msi_vec_index[p_irq], 1); + raw_spin_unlock_irqrestore(&pp->lock, flags); + kfree(msi_vec_index); + return -ENOSPC; + } + + index = bitmap_find_next_zero_area(pp->msi_irq_in_use, + size, + start, + 1, + 0); + bitmap_set(pp->msi_irq_in_use, index, 1); + msi_vec_index[irq] = index; + } - for (i = 0; i < nr_irqs; i++) - irq_domain_set_info(domain, virq + i, bit + i, + raw_spin_unlock_irqrestore(&pp->lock, flags); + + for (irq = 0; irq < nr_irqs; irq++) + irq_domain_set_info(domain, virq + irq, msi_vec_index[irq], pp->msi_irq_chip, pp, handle_edge_irq, NULL, NULL); + kfree(msi_vec_index); return 0; } @@ -207,15 +257,15 @@ static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, static void dw_pcie_irq_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { - struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct irq_data *d; struct dw_pcie_rp *pp = domain->host_data; unsigned long flags; raw_spin_lock_irqsave(&pp->lock, flags); - - bitmap_release_region(pp->msi_irq_in_use, d->hwirq, - order_base_2(nr_irqs)); - + for (int i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + bitmap_clear(pp->msi_irq_in_use, d->hwirq, 1); + } raw_spin_unlock_irqrestore(&pp->lock, flags); } diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 347ab74ac35aa..95629b37a238e 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -14,6 +14,7 @@ #include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/clk.h> +#include <linux/cpumask.h> #include <linux/dma-mapping.h> #include <linux/dma/edma.h> #include <linux/gpio/consumer.h> @@ -373,6 +374,7 @@ struct dw_pcie_rp { struct irq_chip *msi_irq_chip; u32 num_vectors; u32 irq_mask[MAX_MSI_CTRLS]; + struct cpumask msi_ctrl_to_cpu[MAX_MSI_CTRLS]; struct pci_host_bridge *bridge; raw_spinlock_t lock; DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS); -- 2.48.0.rc2.279.g1de40edade-goog