vfio-pci has always virtualized the MSI address and data registers as MSI programming is performed through the SET_IRQS ioctl. Often this virtualization is not used, and in specific cases can be unhelpful. One such case where the virtualization is a hinderance is when the device contains an onboard interrupt controller programmed by the guest driver. Userspace VMMs have a chance to quirk this programming, injecting the host physical MSI information, but only if the userspace driver can get access to the host physical address and data registers. This introduces a device feature which allows the userspace driver to disable virtualization of the MSI capability address and data registers in order to provide read-only access the the physical values. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216055 Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> --- drivers/vfio/pci/vfio_pci_config.c | 26 ++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 21 +++++++++++++++++++++ drivers/vfio/pci/vfio_pci_priv.h | 1 + include/uapi/linux/vfio.h | 14 ++++++++++++++ 4 files changed, 62 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 97422aafaa7b..5f86e75ea6ca 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1259,6 +1259,32 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) return len; } +/* Disable virtualization of the MSI address and data fields */ +int vfio_pci_msi_novirt(struct vfio_pci_core_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + struct perm_bits *perm = vdev->msi_perm; + u16 flags; + int ret; + + if (!perm) + return -EINVAL; + + ret = pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &flags); + if (ret) + return pcibios_err_to_errno(ret); + + p_setd(perm, PCI_MSI_ADDRESS_LO, NO_VIRT, NO_WRITE); + if (flags & PCI_MSI_FLAGS_64BIT) { + p_setd(perm, PCI_MSI_ADDRESS_HI, NO_VIRT, NO_WRITE); + p_setw(perm, PCI_MSI_DATA_64, (u16)NO_VIRT, (u16)NO_WRITE); + } else { + p_setw(perm, PCI_MSI_DATA_32, (u16)NO_VIRT, (u16)NO_WRITE); + } + + return 0; +} + /* Determine extended capability length for VC (2 & 9) and MFVC */ static int vfio_vc_cap_len(struct vfio_pci_core_device *vdev, u16 pos) { diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index ba0ce0075b2f..acdced212be2 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1518,6 +1518,24 @@ static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags, return 0; } +static int vfio_pci_core_feature_msi_novirt(struct vfio_device *device, + u32 flags, void __user *arg, + size_t argsz) +{ + struct vfio_pci_core_device *vdev = + container_of(device, struct vfio_pci_core_device, vdev); + int ret; + + if (!vdev->msi_perm) + return -ENOTTY; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); + if (ret != 1) + return ret; + + return vfio_pci_msi_novirt(vdev); +} + int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { @@ -1531,6 +1549,9 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, return vfio_pci_core_pm_exit(device, flags, arg, argsz); case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN: return vfio_pci_core_feature_token(device, flags, arg, argsz); + case VFIO_DEVICE_FEATURE_PCI_MSI_NOVIRT: + return vfio_pci_core_feature_msi_novirt(device, flags, + arg, argsz); default: return -ENOTTY; } diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index 5e4fa69aee16..6e6cc74c6579 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -53,6 +53,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, int vfio_pci_init_perm_bits(void); void vfio_pci_uninit_perm_bits(void); +int vfio_pci_msi_novirt(struct vfio_pci_core_device *vdev); int vfio_config_init(struct vfio_pci_core_device *vdev); void vfio_config_free(struct vfio_pci_core_device *vdev); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..ddf5dd9245fb 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1458,6 +1458,20 @@ struct vfio_device_feature_bus_master { }; #define VFIO_DEVICE_FEATURE_BUS_MASTER 10 +/** + * Toggle virtualization of PCI MSI address and data fields off. By default + * vfio-pci-core based drivers virtualize the MSI address and data fields of + * the MSI capability to emulate direct access to the device, ie. writes are + * allowed and buffered where subsequent reads return the buffered data. + * VMMs often virtualize these registers anyway and there are cases in user- + * space where having access to the host MSI fields can be useful, such as + * quirking an embedded interrupt controller on the device to generate physical + * MSI interrupts. Upon VFIO_DEVICE_FEATURE_SET of the PCI_MSI_NOVIRT feature + * this virtualization is disabled, reads of the MSI address and data fields + * will return the physical values and writes are dropped. + */ +#define VFIO_DEVICE_FEATURE_PCI_MSI_NOVIRT 11 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- 2.45.2