[+cc Thomas, Marc] On Thu, Jan 31, 2019 at 11:56:49AM -0700, Logan Gunthorpe wrote: > For NTB devices, we want to be able to trigger MSI interrupts > through a memory window. In these cases we may want to use > more interrupts than the NTB PCI device has available in its MSI-X > table. > > We allow for this by creating a new 'virtual' interrupt. These > interrupts are allocated as usual but are not programmed into the > MSI-X table (as there may not be space for them). > > The MSI address and data will then handled through an NTB MSI library > introduced later in this series. > > Signed-off-by: Logan Gunthorpe <logang@xxxxxxxxxxxx> > Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> I assume you'll merge this along with the rest of the series, so: Acked-by: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> Minor question and typo below. > --- > drivers/pci/msi.c | 51 +++++++++++++++++++++++++++++++++++++-------- > include/linux/msi.h | 1 + > include/linux/pci.h | 9 ++++++++ > 3 files changed, 52 insertions(+), 9 deletions(-) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index 4c0b47867258..145587da686c 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) > > static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) > { > + if (desc->msi_attrib.is_virtual) > + return NULL; > + > return desc->mask_base + > desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; > } > @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) > u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) > { > u32 mask_bits = desc->masked; > + void __iomem *desc_addr; > > if (pci_msi_ignore_mask) > return 0; > + desc_addr = pci_msix_desc_addr(desc); > + if (!desc_addr) > + return 0; > > mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; > if (flag) > mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; > - writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); > + > + writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); > > return mask_bits; > } > @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) > if (entry->msi_attrib.is_msix) { > void __iomem *base = pci_msix_desc_addr(entry); > > + if (!base) { > + WARN_ON(1); > + return; > + } > + > msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); > msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); > msg->data = readl(base + PCI_MSIX_ENTRY_DATA); > @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) > } else if (entry->msi_attrib.is_msix) { > void __iomem *base = pci_msix_desc_addr(entry); > > + if (!base) > + goto skip; > + > writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); > writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); > writel(msg->data, base + PCI_MSIX_ENTRY_DATA); > @@ -327,6 +343,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) > msg->data); > } > } > + > +skip: > entry->msg = *msg; > } > > @@ -550,6 +568,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) > > entry->msi_attrib.is_msix = 0; > entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); > + entry->msi_attrib.is_virtual = 0; > entry->msi_attrib.entry_nr = 0; > entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); > entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ > @@ -674,6 +693,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, > struct irq_affinity_desc *curmsk, *masks = NULL; > struct msi_desc *entry; > int ret, i; > + int vec_count = pci_msix_vec_count(dev); > > if (affd) > masks = irq_create_affinity_masks(nvec, affd); > @@ -696,6 +716,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, > entry->msi_attrib.entry_nr = entries[i].entry; > else > entry->msi_attrib.entry_nr = i; > + > + entry->msi_attrib.is_virtual = > + entry->msi_attrib.entry_nr >= vec_count; > + > entry->msi_attrib.default_irq = dev->irq; > entry->mask_base = base; > > @@ -714,12 +738,19 @@ static void msix_program_entries(struct pci_dev *dev, > { > struct msi_desc *entry; > int i = 0; > + void __iomem *desc_addr; > > for_each_pci_msi_entry(entry, dev) { > if (entries) > entries[i++].vector = entry->irq; > - entry->masked = readl(pci_msix_desc_addr(entry) + > - PCI_MSIX_ENTRY_VECTOR_CTRL); > + > + desc_addr = pci_msix_desc_addr(entry); > + if (desc_addr) > + entry->masked = readl(desc_addr + > + PCI_MSIX_ENTRY_VECTOR_CTRL); > + else > + entry->masked = 0; > + > msix_mask_irq(entry, 1); > } > } > @@ -932,7 +963,8 @@ int pci_msix_vec_count(struct pci_dev *dev) > EXPORT_SYMBOL(pci_msix_vec_count); > > static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, > - int nvec, const struct irq_affinity *affd) > + int nvec, const struct irq_affinity *affd, > + int flags) > { > int nr_entries; > int i, j; > @@ -943,7 +975,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, > nr_entries = pci_msix_vec_count(dev); > if (nr_entries < 0) > return nr_entries; > - if (nvec > nr_entries) > + if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) > return nr_entries; > > if (entries) { > @@ -1086,7 +1118,8 @@ EXPORT_SYMBOL(pci_enable_msi); > > static int __pci_enable_msix_range(struct pci_dev *dev, > struct msix_entry *entries, int minvec, > - int maxvec, const struct irq_affinity *affd) > + int maxvec, const struct irq_affinity *affd, > + int flags) > { > int rc, nvec = maxvec; > > @@ -1110,7 +1143,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, > return -ENOSPC; > } > > - rc = __pci_enable_msix(dev, entries, nvec, affd); > + rc = __pci_enable_msix(dev, entries, nvec, affd, flags); > if (rc == 0) > return nvec; > > @@ -1141,7 +1174,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, > int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, > int minvec, int maxvec) > { > - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL); > + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); > } > EXPORT_SYMBOL(pci_enable_msix_range); > > @@ -1181,7 +1214,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, > > if (flags & PCI_IRQ_MSIX) { > msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, > - max_vecs, affd); > + max_vecs, affd, flags); > if (msix_vecs > 0) > return msix_vecs; > } > diff --git a/include/linux/msi.h b/include/linux/msi.h > index 784fb52b9900..6458ab049852 100644 > --- a/include/linux/msi.h > +++ b/include/linux/msi.h > @@ -88,6 +88,7 @@ struct msi_desc { > __u8 multi_cap : 3; > __u8 maskbit : 1; > __u8 is_64 : 1; > + __u8 is_virtual : 1; You did the right thing by using the same style as what's already here, but does anybody know why are we using __u8 and __u16 here? Those typedefs are in include/uapi/asm-generic/int-l64.h, which suggests they're for things exported to user space, but I don't think that's the case here, so I'm wondering if we could someday replace these with u8 and u16. Obviously that wouldn't be part of *this* series. > __u16 entry_nr; > unsigned default_irq; > } msi_attrib; > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 65f1d8c2f082..ce0815c2c498 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1352,6 +1352,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, > #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ > #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ > #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ > + > +/* > + * Virtual interrupts allow for more interrupts to be allocated > + * than the device has interrupts for. These are not programmed > + * into the devices MSI-X table and must be handled by some s/devices/device's/ > + * other driver means. > + */ > +#define PCI_IRQ_VIRTUAL (1 << 4) > + > #define PCI_IRQ_ALL_TYPES \ > (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) > > -- > 2.19.0 >