PAMU (FSL IOMMU) has a concept of primary window and subwindows. Primary window corresponds to the complete guest iova address space (including MSI space), with respect to IOMMU_API this is termed as geometry. IOVA Base of subwindow is determined from the number of subwindows (configurable using iommu API). MSI I/O page must be within the geometry and maximum supported subwindows, so MSI IO-page is setup just after guest memory iova space. This patch is for setting up MSI iova-base for vfio devices assigned in msi subsystem, so that when msi-message will be composed then this configured iova will be used. According to this design vfio will make msi_set_iova() msi-API call to setup iova for a device. MSI will keep track of iova-base of all device under a msi-bank. When composing the MSI address and data this list will be traversed, if device found in the list then device used by vfio and its iova-base will be taken from here otherwise iova-base will be taken as before. This is a draft patch to describe the interface to setup iova in MSI (what Alex Williamson proposed earlier on related patchset). Currently I have bundled all changes in one patch to take initial review comment on design. I will divide this in multiple logical patches once this design is accepted. Signed-off-by: Bharat Bhushan <bharat.bhushan@xxxxxxxxxxxxx> --- arch/powerpc/include/asm/machdep.h | 2 + arch/powerpc/kernel/msi.c | 10 ++++++ arch/powerpc/sysdev/fsl_msi.c | 64 ++++++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/fsl_msi.h | 10 ++++- drivers/pci/msi.c | 12 +++++++ include/linux/pci.h | 8 ++++ 6 files changed, 104 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 8d1b787..e87b806 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -132,6 +132,8 @@ struct machdep_calls { /* Returns the requested region's address and size */ int (*msi_get_region)(int region_num, struct msi_region *region); + int (*msi_set_iova)(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set); #endif void (*restart)(char *cmd); diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 1a67787..e2bd555 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -13,6 +13,16 @@ #include <asm/machdep.h> +int arch_msi_set_iova(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set) +{ + if (ppc_md.msi_set_iova) { + pr_debug("msi: Using platform get_region_count routine.\n"); + return ppc_md.msi_set_iova(pdev, region_num, iova, set); + } + return 0; +} + int arch_msi_get_region_count(void) { if (ppc_md.msi_get_region_count) { diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index eeebbf0..ad22d74 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -137,6 +137,46 @@ static int fsl_msi_get_region(int region_num, struct msi_region *region) return -ENODEV; } +static int fsl_msi_set_iova(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set) +{ + struct fsl_msi *msi_data; + struct fsl_msi_device *device; + + list_for_each_entry(msi_data, &msi_head, list) { + if (msi_data->bank_index != region_num) + continue; + mutex_lock(&msi_data->lock); + if (set) { + list_for_each_entry(device, &msi_data->device_list, list) { + if (device->dev == pdev) { + device->iova = iova; + mutex_unlock(&msi_data->lock); + return 0; + } + } + + device = kzalloc(sizeof(struct fsl_msi_device), GFP_KERNEL); + device->dev = pdev; + device->iova = iova; + list_add_tail(&device->list, &msi_data->device_list); + } else { + list_for_each_entry(device, &msi_data->device_list, list) { + if (device->dev == pdev) { + list_del(&device->list); + kfree(device); + mutex_unlock(&msi_data->lock); + return 0; + } + } + } + + mutex_unlock(&msi_data->lock); + break; + } + return 0; +} + static int fsl_msi_check_device(struct pci_dev *pdev, int nvec, int type) { if (type == PCI_CAP_ID_MSIX) @@ -167,6 +207,7 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq, struct msi_msg *msg, struct fsl_msi *fsl_msi_data) { + struct fsl_msi_device *device; struct fsl_msi *msi_data = fsl_msi_data; struct pci_controller *hose = pci_bus_to_host(pdev->bus); u64 address; /* Physical address of the MSIIR */ @@ -181,6 +222,18 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq, address = fsl_pci_immrbar_base(hose) + (msi_data->msiir & 0xfffff); + printk("%s address = %llx\n", __func__, address); + + mutex_lock(&msi_data->lock); + list_for_each_entry(device, &msi_data->device_list, list) { + if (device->dev == pdev) { + address = device->iova | (msi_data->msiir & 0xfff); + break; + } + } + mutex_unlock(&msi_data->lock); + printk("%s address = %llx\n", __func__, address); + msg->address_lo = lower_32_bits(address); msg->address_hi = upper_32_bits(address); @@ -356,6 +409,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) struct fsl_msi *msi = platform_get_drvdata(ofdev); int virq, i; struct fsl_msi_cascade_data *cascade_data; + struct fsl_msi_device *device; if (msi->list.prev != NULL) list_del(&msi->list); @@ -371,6 +425,13 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) msi_bitmap_free(&msi->bitmap); if ((msi->feature & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) iounmap(msi->msi_regs); + + mutex_lock(&msi->lock); + list_for_each_entry(device, &msi->device_list, list) { + list_del(&device->list); + kfree(device); + } + mutex_unlock(&msi->lock); kfree(msi); return 0; @@ -436,6 +497,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) dev_err(&dev->dev, "No memory for MSI structure\n"); return -ENOMEM; } + INIT_LIST_HEAD(&msi->device_list); + mutex_init(&msi->lock); platform_set_drvdata(dev, msi); msi->irqhost = irq_domain_add_linear(dev->dev.of_node, @@ -558,6 +621,7 @@ static int fsl_of_msi_probe(struct platform_device *dev) ppc_md.msi_check_device = fsl_msi_check_device; ppc_md.msi_get_region_count = fsl_msi_get_region_count; ppc_md.msi_get_region = fsl_msi_get_region; + ppc_md.msi_set_iova = fsl_msi_set_iova; } else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) { dev_err(&dev->dev, "Different MSI driver already installed!\n"); err = -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h index a2cc5a2..adda5c3 100644 --- a/arch/powerpc/sysdev/fsl_msi.h +++ b/arch/powerpc/sysdev/fsl_msi.h @@ -27,9 +27,15 @@ #define FSL_PIC_IP_IPIC 0x00000002 #define FSL_PIC_IP_VMPIC 0x00000003 +struct fsl_msi_device { + struct list_head list; + struct pci_dev *dev; + dma_addr_t iova; +}; + struct fsl_msi { struct irq_domain *irqhost; - + struct mutex lock; unsigned long cascade_irq; phys_addr_t msiir; /* MSIIR Address in CCSR */ u32 ibs_shift; /* Shift of interrupt bit select */ @@ -37,7 +43,7 @@ struct fsl_msi { void __iomem *msi_regs; u32 feature; int msi_virqs[NR_MSI_REG_MAX]; - + struct list_head device_list; /* * During probe each bank is assigned a index number. * index number start from 0. diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2643a29..59ec465 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -77,6 +77,18 @@ int __weak arch_msi_get_region(int region_num, struct msi_region *region) return 0; } +int __weak arch_msi_set_iova(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set) +{ + return 0; +} + +int msi_set_iova(struct pci_dev *pdev, int region_num, dma_addr_t iova, bool set) +{ + return arch_msi_set_iova(pdev, region_num, iova, set); +} +EXPORT_SYMBOL(msi_set_iova); + int msi_get_region_count(void) { return arch_msi_get_region_count(); diff --git a/include/linux/pci.h b/include/linux/pci.h index c587034..c6d3e58 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1195,6 +1195,12 @@ static inline int msi_get_region(int region_num, struct msi_region *region) { return 0; } + +static inline int msi_set_iova(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set) +{ + return 0; +} #else int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec); @@ -1209,6 +1215,8 @@ void pci_restore_msi_state(struct pci_dev *dev); int pci_msi_enabled(void); int msi_get_region_count(void); int msi_get_region(int region_num, struct msi_region *region); +int msi_set_iova(struct pci_dev *pdev, int region_num, + dma_addr_t iova, bool set); #endif #ifdef CONFIG_PCIEPORTBUS -- 1.7.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html