[PATCH kvmtool 08/10] vfio-pci: add MSI support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When a device has MSI capability but not MSI-X, use it.

This patch is untested. Consider it broken unless proven otherwise.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@xxxxxxx>
---
 include/kvm/pci.h  |  23 +++++++++
 include/kvm/vfio.h |   9 ++++
 vfio.c             | 144 +++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 161 insertions(+), 15 deletions(-)

diff --git a/include/kvm/pci.h b/include/kvm/pci.h
index 44e5adff..c5fc8254 100644
--- a/include/kvm/pci.h
+++ b/include/kvm/pci.h
@@ -59,6 +59,29 @@ struct msix_cap {
 	u32 pba_offset;
 };
 
+struct msi_cap_64 {
+	u8 cap;
+	u8 next;
+	u16 ctrl;
+	u32 address_lo;
+	u32 address_hi;
+	u16 data;
+	u16 _align;
+	u32 mask_bits;
+	u32 pend_bits;
+};
+
+struct msi_cap_32 {
+	u8 cap;
+	u8 next;
+	u16 ctrl;
+	u32 address_lo;
+	u16 data;
+	u16 _align;
+	u32 mask_bits;
+	u32 pend_bits;
+};
+
 struct pci_cap_hdr {
 	u8	type;
 	u8	next;
diff --git a/include/kvm/vfio.h b/include/kvm/vfio.h
index 68535963..71dfa8f7 100644
--- a/include/kvm/vfio.h
+++ b/include/kvm/vfio.h
@@ -29,9 +29,18 @@ struct vfio_pci_msix_pba {
 	u32				guest_phys_addr;
 };
 
+struct vfio_pci_msi {
+	struct msi_cap_64		*cap;
+	size_t				nr_entries;
+	struct vfio_pci_msix_entry	*entries;
+	off_t				pos;
+	size_t				size;
+};
+
 struct vfio_pci_device {
 	struct pci_device_header	hdr;
 
+	struct vfio_pci_msi		msi;
 	struct vfio_pci_msix_table	msix_table;
 	struct vfio_pci_msix_pba	msix_pba;
 };
diff --git a/vfio.c b/vfio.c
index 85d1ea8b..0f6c2f24 100644
--- a/vfio.c
+++ b/vfio.c
@@ -114,6 +114,60 @@ static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
 	irq__update_msix_route(kvm, entry->gsi, &entry->config.msg);
 }
 
+static void vfio_pci_msi_write(struct kvm *kvm, struct vfio_device *device,
+			       u8 off, u8 *data, u32 len)
+{
+	int ret;
+	size_t i, nr_vectors;
+	struct msi_msg msi;
+	struct vfio_pci_msix_entry *entry;
+	struct vfio_pci_device *pdev = &device->pci;
+	struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos;
+
+	/* Only modify routes when guest sets the enable bit */
+	if (off < PCI_MSI_FLAGS || off >= PCI_MSI_ADDRESS_LO)
+		return;
+
+	if (!(msi_cap_64->ctrl & PCI_MSI_FLAGS_ENABLE))
+		return;
+
+	/* Create routes for the requested vectors */
+	nr_vectors = 1 << ((msi_cap_64->ctrl & PCI_MSI_FLAGS_QSIZE) >> 4);
+
+	msi.address_lo = msi_cap_64->address_lo;
+	if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT) {
+		msi.address_hi = msi_cap_64->address_hi;
+		msi.data = msi_cap_64->data;
+	} else {
+		struct msi_cap_32 *msi_cap_32 = (void *)msi_cap_64;
+		msi.address_hi = 0;
+		msi.data = msi_cap_32->data;
+	}
+
+	for (i = 0; i < nr_vectors; i++) {
+		u32 devid = device->dev_hdr.dev_num << 3;
+
+		entry = &pdev->msi.entries[i];
+		if (entry->gsi == -1) {
+			ret = irq__add_msix_route(kvm, &msi, devid);
+			if (ret < 0) {
+				pr_err("cannot create MSI-X route");
+				break;
+			}
+
+			entry->gsi = ret;
+
+			ret = irq__add_irqfd(kvm, ret, entry->eventfd, -1);
+			if (ret < 0) {
+				pr_err("Cannot setup irqfd");
+				break;
+			}
+		} else {
+			irq__update_msix_route(kvm, entry->gsi, &msi);
+		}
+	}
+}
+
 static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
 			      u8 offset, void *data, int sz)
 {
@@ -151,6 +205,24 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd
 	if (pread(device->fd, base + offset, sz, info->offset + offset) != sz)
 		pr_warning("Failed to read %d bytes from Configuration Space at 0x%x",
 			   sz, offset);
+
+	if (pdev->msi.cap && offset >= pdev->msi.pos && offset <
+	    pdev->msi.pos + pdev->msi.size) {
+		vfio_pci_msi_write(kvm, device, offset - pdev->msi.pos, data,
+				   sz);
+	}
+}
+
+static ssize_t vfio_pci_msi_cap_size(struct msi_cap_64 *cap_hdr)
+{
+	size_t size = 10;
+
+	if (cap_hdr->ctrl & PCI_MSI_FLAGS_64BIT)
+		size += 4;
+	if (cap_hdr->ctrl & PCI_MSI_FLAGS_MASKBIT)
+		size += 10;
+
+	return size;
 }
 
 static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr)
@@ -158,6 +230,8 @@ static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr)
 	switch (cap_hdr->type) {
 	case PCI_CAP_ID_MSIX:
 		return PCI_CAP_MSIX_SIZEOF;
+	case PCI_CAP_ID_MSI:
+		return vfio_pci_msi_cap_size((void *)cap_hdr);
 	default:
 		pr_err("unknown PCI capability %u", cap_hdr->type);
 		return 0;
@@ -231,9 +305,10 @@ static int vfio_pci_parse_caps(struct vfio_device *device)
 
 		switch (cap.type) {
 		case PCI_CAP_ID_MSIX:
+		case PCI_CAP_ID_MSI:
 			ret = vfio_pci_add_cap(device, &cap, info->offset, pos);
 			if (ret) {
-				pr_warning("Failed to read MSI-X capability structure");
+				pr_warning("Failed to read capability structure");
 				return ret;
 			}
 			break;
@@ -493,11 +568,30 @@ out_free:
 	return ret;
 }
 
+static int vfio_pci_init_msi(struct kvm *kvm, struct vfio_pci_device *pdev,
+			     struct msi_cap_64 *cap)
+{
+	pdev->msi = (struct vfio_pci_msi) {
+		.nr_entries = 1 << ((cap->ctrl & PCI_MSI_FLAGS_QMASK) >> 1),
+		.cap = cap,
+		.pos = (void *)cap - (void *)&pdev->hdr,
+		.size = vfio_pci_cap_size((void *)cap),
+	};
+
+	pdev->msi.entries = calloc(pdev->msi.nr_entries,
+				   sizeof(struct vfio_pci_msix_entry));
+	if (!pdev->msi.entries)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int vfio_pci_configure_dev_regions(struct kvm *kvm,
 					  struct vfio_device *device)
 {
 	int ret;
 	struct msix_cap *msix;
+	struct msi_cap_64 *msi;
 	struct vfio_pci_device *pdev = &device->pci;
 	u32 i, num_regions = device->info.num_regions;
 
@@ -512,6 +606,13 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
 			return ret;
 	}
 
+	msi = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSI);
+	if (msi) {
+		ret = vfio_pci_init_msi(kvm, pdev, msi);
+		if (ret)
+			return ret;
+	}
+
 	/* First of all, map the BARs directly into the guest */
 	for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) {
 		struct vfio_region *region;
@@ -576,30 +677,28 @@ static int vfio_configure_dev_regions(struct kvm *kvm,
 	return vfio_pci_configure_dev_regions(kvm, device);
 }
 
-static int vfio_pci_init_msix_irqfd(struct kvm *kvm,
-				    struct vfio_device *device)
+static int vfio_pci_init_msi_irqfd(int devfd, size_t index, size_t nr_entries,
+				   struct vfio_pci_msix_entry *entries)
 {
 	int ret;
 	size_t i;
 	int *eventfds;
 	size_t irq_set_size;
 	struct vfio_irq_set *irq_set;
-	struct vfio_pci_msix_table *table = &device->pci.msix_table;
 
 	/*
-	 * We likely have VFIO_IRQ_INFO_NORESIZE for MSI-X, and we don't want to
+	 * We likely have VFIO_IRQ_INFO_NORESIZE for MSI/X, and we don't want to
 	 * enable/disable MSIs every time the guest requests a new one. Setup
 	 * IRQFD for all vectors upfront.
 	 *
-	 * We cannot start creating the MSI-X routes in KVM just now. First we
+	 * We cannot start creating the MSI routes in KVM just now. First we
 	 * need to wait for all devices to allocate their IRQ lines, and only
 	 * after that number is freezed will we be able to allocate MSI numbers.
 	 * A bit unfortunate (it would be much easier to handle initialization
 	 * errors here), but okay. Store eventfd until we're ready to create the
 	 * routes.
 	 */
-	irq_set_size = sizeof(struct vfio_irq_set) +
-		       table->nr_entries * sizeof(int);
+	irq_set_size = sizeof(struct vfio_irq_set) + nr_entries * sizeof(int);
 	irq_set = malloc(irq_set_size);
 	if (!irq_set)
 		return -ENOMEM;
@@ -607,13 +706,13 @@ static int vfio_pci_init_msix_irqfd(struct kvm *kvm,
 	*irq_set = (struct vfio_irq_set) {
 		.argsz	= irq_set_size,
 		.flags 	= VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
-		.index 	= VFIO_PCI_MSIX_IRQ_INDEX,
+		.index 	= index,
 		.start 	= 0,
-		.count 	= table->nr_entries,
+		.count 	= nr_entries,
 	};
 	eventfds = (void *)irq_set + sizeof(struct vfio_irq_set);
 
-	for (i = 0; i < table->nr_entries; i++) {
+	for (i = 0; i < nr_entries; i++) {
 		eventfds[i] = eventfd(0, 0);
 		if (eventfds[i] < 0) {
 			pr_err("cannot create eventfd (try to increase RLIMIT_NOFILE)");
@@ -621,11 +720,11 @@ static int vfio_pci_init_msix_irqfd(struct kvm *kvm,
 			goto out_free;
 		}
 
-		table->entries[i].gsi = -1;
-		table->entries[i].eventfd = eventfds[i];
+		entries[i].gsi = -1;
+		entries[i].eventfd = eventfds[i];
 	}
 
-	ret = ioctl(device->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	ret = ioctl(devfd, VFIO_DEVICE_SET_IRQS, irq_set);
 	if (ret < 0)
 		pr_err("Cannot register vfio_irq_set");
 
@@ -714,10 +813,12 @@ static int vfio_configure_dev_irqs(struct kvm *kvm, struct vfio_device *device)
 	int ret;
 	struct vfio_pci_device *pdev = &device->pci;
 	struct msix_cap *msix = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSIX);
+	struct msi_cap_64 *msi = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSI);
 
 	device->irq_info = (struct vfio_irq_info) {
 		.argsz = sizeof(device->irq_info),
 		.index = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
+			 msi  ? VFIO_PCI_MSI_IRQ_INDEX :
 			        VFIO_PCI_INTX_IRQ_INDEX,
 	};
 
@@ -738,7 +839,20 @@ static int vfio_configure_dev_irqs(struct kvm *kvm, struct vfio_device *device)
 			return -EINVAL;
 		}
 
-		ret = vfio_pci_init_msix_irqfd(kvm, device);
+		ret = vfio_pci_init_msi_irqfd(device->fd,
+					      VFIO_PCI_MSIX_IRQ_INDEX,
+					      pdev->msix_table.nr_entries,
+					      pdev->msix_table.entries);
+	} else if (msi) {
+		if (device->irq_info.count != pdev->msi.nr_entries) {
+			pr_err("Invalid number of MSIs reported by VFIO");
+			return -EINVAL;
+		}
+
+		ret = vfio_pci_init_msi_irqfd(device->fd,
+					      VFIO_PCI_MSI_IRQ_INDEX,
+					      pdev->msi.nr_entries,
+					      pdev->msi.entries);
 	} else {
 		int gsi = pdev->hdr.irq_line - KVM_IRQ_OFFSET;
 
-- 
2.12.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux