[PATCH RFC/RFT] vfio/pci: Create feature to disable MSI virtualization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



vfio-pci has always virtualized the MSI address and data registers as
MSI programming is performed through the SET_IRQS ioctl.  Often this
virtualization is not used, and in specific cases can be unhelpful.

One such case where the virtualization is a hinderance is when the
device contains an onboard interrupt controller programmed by the guest
driver.  Userspace VMMs have a chance to quirk this programming,
injecting the host physical MSI information, but only if the userspace
driver can get access to the host physical address and data registers.

This introduces a device feature which allows the userspace driver to
disable virtualization of the MSI capability address and data registers
in order to provide read-only access the the physical values.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216055
Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---
 drivers/vfio/pci/vfio_pci_config.c | 26 ++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_core.c   | 21 +++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_priv.h   |  1 +
 include/uapi/linux/vfio.h          | 14 ++++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 97422aafaa7b..5f86e75ea6ca 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1259,6 +1259,32 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos)
 	return len;
 }
 
+/* Disable virtualization of the MSI address and data fields */
+int vfio_pci_msi_novirt(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	struct perm_bits *perm = vdev->msi_perm;
+	u16 flags;
+	int ret;
+
+	if (!perm)
+		return -EINVAL;
+
+	ret = pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &flags);
+	if (ret)
+		return pcibios_err_to_errno(ret);
+
+	p_setd(perm, PCI_MSI_ADDRESS_LO, NO_VIRT, NO_WRITE);
+	if (flags & PCI_MSI_FLAGS_64BIT) {
+		p_setd(perm, PCI_MSI_ADDRESS_HI, NO_VIRT, NO_WRITE);
+		p_setw(perm, PCI_MSI_DATA_64, (u16)NO_VIRT, (u16)NO_WRITE);
+	} else {
+		p_setw(perm, PCI_MSI_DATA_32, (u16)NO_VIRT, (u16)NO_WRITE);
+	}
+
+	return 0;
+}
+
 /* Determine extended capability length for VC (2 & 9) and MFVC */
 static int vfio_vc_cap_len(struct vfio_pci_core_device *vdev, u16 pos)
 {
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index ba0ce0075b2f..acdced212be2 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1518,6 +1518,24 @@ static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
 	return 0;
 }
 
+static int vfio_pci_core_feature_msi_novirt(struct vfio_device *device,
+					    u32 flags, void __user *arg,
+					    size_t argsz)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(device, struct vfio_pci_core_device, vdev);
+	int ret;
+
+	if (!vdev->msi_perm)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
+	if (ret != 1)
+		return ret;
+
+	return vfio_pci_msi_novirt(vdev);
+}
+
 int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
 				void __user *arg, size_t argsz)
 {
@@ -1531,6 +1549,9 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
 		return vfio_pci_core_pm_exit(device, flags, arg, argsz);
 	case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
 		return vfio_pci_core_feature_token(device, flags, arg, argsz);
+	case VFIO_DEVICE_FEATURE_PCI_MSI_NOVIRT:
+		return vfio_pci_core_feature_msi_novirt(device, flags,
+							arg, argsz);
 	default:
 		return -ENOTTY;
 	}
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 5e4fa69aee16..6e6cc74c6579 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -53,6 +53,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
 
 int vfio_pci_init_perm_bits(void);
 void vfio_pci_uninit_perm_bits(void);
+int vfio_pci_msi_novirt(struct vfio_pci_core_device *vdev);
 
 int vfio_config_init(struct vfio_pci_core_device *vdev);
 void vfio_config_free(struct vfio_pci_core_device *vdev);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..ddf5dd9245fb 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1458,6 +1458,20 @@ struct vfio_device_feature_bus_master {
 };
 #define VFIO_DEVICE_FEATURE_BUS_MASTER 10
 
+/**
+ * Toggle virtualization of PCI MSI address and data fields off.  By default
+ * vfio-pci-core based drivers virtualize the MSI address and data fields of
+ * the MSI capability to emulate direct access to the device, ie. writes are
+ * allowed and buffered where subsequent reads return the buffered data.
+ * VMMs often virtualize these registers anyway and there are cases in user-
+ * space where having access to the host MSI fields can be useful, such as
+ * quirking an embedded interrupt controller on the device to generate physical
+ * MSI interrupts.  Upon VFIO_DEVICE_FEATURE_SET of the PCI_MSI_NOVIRT feature
+ * this virtualization is disabled, reads of the MSI address and data fields
+ * will return the physical values and writes are dropped.
+ */
+#define VFIO_DEVICE_FEATURE_PCI_MSI_NOVIRT 11
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.45.2





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux