Re: [PATCH] vmd: Interrupt affinity pairing to child devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



$ git log --oneline drivers/pci/host/vmd.c
46a6561b29cb PCI: vmd: Remove IRQ affinity so we can allocate more IRQs
e2b1820bd5d0 PCI: vmd: Free up IRQs on suspend path
f2586c678cb2 PCI: vmd: Assign vector zero to all bridges
37d7f818a462 PCI: vmd: Reserve IRQ pre-vector for better affinity
0cb259c47a4d PCI: vmd: Move SRCU cleanup after bus, child device removal
575a144e7b30 PCI: vmd: Correct comment: VMD domains start at 0x10000, not 0x1000

Make yours match:

  PCI: vmd: <verb> ...

On Thu, Feb 01, 2018 at 03:23:05PM -0700, Keith Busch wrote:
> Performance for devices in VMD domains suffer in NUMA environments if
> we're not respecting the desired IRQ CPU affinity. This patch fixes
> that by creating managed affinity irq vectors for the VMD device, and
> then drivers registering their chained interrupts will be assigned the
> h/w irq that most closely matches its desired IRQ affinity. A tie is
> awarded to the lesser used vector.
> 
> Note, this only works for drivers that allocate their vectors with
> PCI_IRQ_AFFINITY. All other drivers will be assigned the least used
> vector without consideration for affinity.
> 
> Signed-off-by: Keith Busch <keith.busch@xxxxxxxxx>
> ---
>  drivers/pci/host/vmd.c | 80 ++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 65 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c
> index 930a8fa08bd6..ac84676e79a4 100644
> --- a/drivers/pci/host/vmd.c
> +++ b/drivers/pci/host/vmd.c
> @@ -166,10 +166,6 @@ static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
>  	return 0;
>  }
>  
> -/*
> - * XXX: We can be even smarter selecting the best IRQ once we solve the
> - * affinity problem.
> - */
>  static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
>  {
>  	int i, best = 1;
> @@ -188,24 +184,61 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d
>  	return &vmd->irqs[best];
>  }
>  
> +static struct vmd_irq_list *vmd_next_affinity_irq(struct vmd_dev *vmd,  const struct cpumask *dest)
> +{
> +	struct vmd_irq_list *irq = NULL;
> +	const struct cpumask *vmd_mask;
> +	unsigned long flags, match;
> +	int i, best = 0;
> +
> +	if (!dest || vmd->msix_count < 2)
> +		return NULL;
> +
> +	raw_spin_lock_irqsave(&list_lock, flags);
> +	for (i = 1; i < vmd->msix_count; i++) {
> +		struct cpumask tmp;
> +
> +		vmd_mask = pci_irq_get_affinity(vmd->dev, i);
> +		cpumask_and(&tmp, vmd_mask, dest);
> +		match = cpumask_weight(&tmp);
> +		if (match >= best) {
> +			if (match == best && irq &&
> +			    (vmd->irqs[i].count >= irq->count))
> +				continue;
> +			irq = &vmd->irqs[i];
> +			best = match;
> +		}
> +	}
> +	if (irq)
> +		irq->count++;
> +	raw_spin_unlock_irqrestore(&list_lock, flags);
> +
> +	return irq;
> +}
> +
>  static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
>  			unsigned int virq, irq_hw_number_t hwirq,
>  			msi_alloc_info_t *arg)
>  {
> -	struct msi_desc *desc = arg->desc;
> -	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
> +	struct msi_desc *msidesc = arg->desc;
> +	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(msidesc)->bus);
>  	struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
> -	unsigned int index, vector;
> +	struct irq_desc *desc = irq_to_desc(virq);
> +	unsigned int vector;
>  
>  	if (!vmdirq)
>  		return -ENOMEM;
>  
>  	INIT_LIST_HEAD(&vmdirq->node);
> -	vmdirq->irq = vmd_next_irq(vmd, desc);
> -	vmdirq->virq = virq;
> -	index = index_from_irqs(vmd, vmdirq->irq);
> -	vector = pci_irq_vector(vmd->dev, index);
>  
> +	if (desc && irqd_affinity_is_managed(&desc->irq_data))
> +		vmdirq->irq = vmd_next_affinity_irq(vmd,
> +					desc->irq_common_data.affinity);
> +	if (vmdirq->irq == NULL)
> +		vmdirq->irq = vmd_next_irq(vmd, msidesc);
> +
> +	vmdirq->virq = virq;
> +	vector = pci_irq_vector(vmd->dev, index_from_irqs(vmd, vmdirq->irq));
>  	irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
>  			    handle_untracked_irq, vmd, NULL);
>  	return 0;
> @@ -233,9 +266,11 @@ static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
>  	struct pci_dev *pdev = to_pci_dev(dev);
>  	struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
>  
> -	if (nvec > vmd->msix_count)
> +	if (nvec > vmd->msix_count) {
> +		if (vmd->msix_count > 1)
> +			return vmd->msix_count - 1;
>  		return vmd->msix_count;
> -
> +	}
>  	memset(arg, 0, sizeof(*arg));
>  	return 0;
>  }
> @@ -663,6 +698,14 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
>  	struct vmd_dev *vmd;
>  	int i, err;
>  
> +	/*
> +	 * The first vector is reserved for special use, so start affinity at
> +	 * the second vector.
> +	 */
> +	struct irq_affinity affd = {
> +		.pre_vectors = 1,
> +	};
> +
>  	if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
>  		return -ENOMEM;
>  
> @@ -688,8 +731,15 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
>  	if (vmd->msix_count < 0)
>  		return -ENODEV;
>  
> -	vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
> -					PCI_IRQ_MSIX);
> +	/*
> +	 * Reserve remaining vectors that IRQ affinity won't be able to assign.
> +	 */
> +	if ((vmd->msix_count - 1) > cpumask_weight(cpu_present_mask))
> +		affd.post_vectors = vmd->msix_count -
> +					cpumask_weight(cpu_present_mask) - 1;
> +
> +	vmd->msix_count = pci_alloc_irq_vectors_affinity(dev, 1, vmd->msix_count,
> +					PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &affd);
>  	if (vmd->msix_count < 0)
>  		return vmd->msix_count;
>  
> -- 
> 2.14.3
> 



[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux