By default VFIO disables mapping of MSIX BAR to the userspace as the userspace may program it in a way allowing spurious interrupts; instead the userspace uses the VFIO_DEVICE_SET_IRQS ioctl. This works fine as long as the system page size equals to the MSIX alignment requirement which is 4KB. However with a bigger page size the existing code prohibits mapping non-MSIX parts of a page with MSIX structures so these parts have to be emulated via slow reads/writes on a VFIO device fd. If these emulated bits are accessed often, this has serious impact on performance. This adds an ioctl to the vfio-pci device which hides the sparse capability and allows the userspace to map a BAR with MSIX structures. Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> --- drivers/vfio/pci/vfio_pci_private.h | 1 + include/uapi/linux/vfio.h | 15 +++++++++++++++ drivers/vfio/pci/vfio_pci.c | 19 +++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index f561ac1..058fa9b 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -90,6 +90,7 @@ struct vfio_pci_device { bool has_vga; bool needs_reset; bool nointx; + bool msix_mmap; struct pci_saved_state *pci_saved_state; int refcnt; struct eventfd_ctx *err_trigger; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ae46105..60cd4fd 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -502,6 +502,21 @@ struct vfio_pci_hot_reset { #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) +/** + * VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_pci_allow_msix_mmap) + * + * Return: 0 on success, -errno on failure. + */ +#define VFIO_PCI_ALLOW_MSIX_MMAP (1 << 0) /* mmap of entire MSIX BAR */ + +struct vfio_pci_allow_msix_mmap { + __u32 argsz; + __u32 flags; +}; + +#define VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP _IO(VFIO_TYPE, VFIO_BASE + 14) + /* -------- API for Type1 VFIO IOMMU -------- */ /** diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f041b1a..adba5ab 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -691,7 +691,8 @@ static long vfio_pci_ioctl(void *device_data, VFIO_REGION_INFO_FLAG_WRITE; if (vdev->bar_mmap_supported[info.index]) { info.flags |= VFIO_REGION_INFO_FLAG_MMAP; - if (info.index == vdev->msix_bar) { + if (info.index == vdev->msix_bar && + !vdev->msix_mmap) { ret = msix_sparse_mmap_cap(vdev, &caps); if (ret) return ret; @@ -1039,6 +1040,20 @@ static long vfio_pci_ioctl(void *device_data, kfree(groups); return ret; + } else if (cmd == VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP) { + struct vfio_pci_allow_msix_mmap hdr; + + minsz = offsetofend(struct vfio_pci_allow_msix_mmap, flags); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz || hdr.flags & ~VFIO_PCI_ALLOW_MSIX_MMAP) + return -EINVAL; + + vdev->msix_mmap = !!(hdr.flags & VFIO_PCI_ALLOW_MSIX_MMAP); + + return 0; } return -ENOTTY; @@ -1122,7 +1137,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) if (req_start + req_len > phys_len) return -EINVAL; - if (index == vdev->msix_bar) { + if (!vdev->msix_mmap && index == vdev->msix_bar) { /* * Disallow mmaps overlapping the MSI-X table; users don't * get to touch this directly. We could find somewhere -- 2.11.0