When a device has MSI capability but not MSI-X, use it. This patch is untested. Consider it broken unless proven otherwise. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@xxxxxxx> --- include/kvm/pci.h | 23 +++++++++ include/kvm/vfio.h | 9 ++++ vfio.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 161 insertions(+), 15 deletions(-) diff --git a/include/kvm/pci.h b/include/kvm/pci.h index 44e5adff..c5fc8254 100644 --- a/include/kvm/pci.h +++ b/include/kvm/pci.h @@ -59,6 +59,29 @@ struct msix_cap { u32 pba_offset; }; +struct msi_cap_64 { + u8 cap; + u8 next; + u16 ctrl; + u32 address_lo; + u32 address_hi; + u16 data; + u16 _align; + u32 mask_bits; + u32 pend_bits; +}; + +struct msi_cap_32 { + u8 cap; + u8 next; + u16 ctrl; + u32 address_lo; + u16 data; + u16 _align; + u32 mask_bits; + u32 pend_bits; +}; + struct pci_cap_hdr { u8 type; u8 next; diff --git a/include/kvm/vfio.h b/include/kvm/vfio.h index 68535963..71dfa8f7 100644 --- a/include/kvm/vfio.h +++ b/include/kvm/vfio.h @@ -29,9 +29,18 @@ struct vfio_pci_msix_pba { u32 guest_phys_addr; }; +struct vfio_pci_msi { + struct msi_cap_64 *cap; + size_t nr_entries; + struct vfio_pci_msix_entry *entries; + off_t pos; + size_t size; +}; + struct vfio_pci_device { struct pci_device_header hdr; + struct vfio_pci_msi msi; struct vfio_pci_msix_table msix_table; struct vfio_pci_msix_pba msix_pba; }; diff --git a/vfio.c b/vfio.c index 85d1ea8b..0f6c2f24 100644 --- a/vfio.c +++ b/vfio.c @@ -114,6 +114,60 @@ static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, irq__update_msix_route(kvm, entry->gsi, &entry->config.msg); } +static void vfio_pci_msi_write(struct kvm *kvm, struct vfio_device *device, + u8 off, u8 *data, u32 len) +{ + int ret; + size_t i, nr_vectors; + struct msi_msg msi; + struct vfio_pci_msix_entry *entry; + struct vfio_pci_device *pdev = &device->pci; + struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos; + + /* Only modify routes when guest sets the enable bit */ + if (off < PCI_MSI_FLAGS || off >= PCI_MSI_ADDRESS_LO) + return; + + if (!(msi_cap_64->ctrl & PCI_MSI_FLAGS_ENABLE)) + return; + + /* Create routes for the requested vectors */ + nr_vectors = 1 << ((msi_cap_64->ctrl & PCI_MSI_FLAGS_QSIZE) >> 4); + + msi.address_lo = msi_cap_64->address_lo; + if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT) { + msi.address_hi = msi_cap_64->address_hi; + msi.data = msi_cap_64->data; + } else { + struct msi_cap_32 *msi_cap_32 = (void *)msi_cap_64; + msi.address_hi = 0; + msi.data = msi_cap_32->data; + } + + for (i = 0; i < nr_vectors; i++) { + u32 devid = device->dev_hdr.dev_num << 3; + + entry = &pdev->msi.entries[i]; + if (entry->gsi == -1) { + ret = irq__add_msix_route(kvm, &msi, devid); + if (ret < 0) { + pr_err("cannot create MSI-X route"); + break; + } + + entry->gsi = ret; + + ret = irq__add_irqfd(kvm, ret, entry->eventfd, -1); + if (ret < 0) { + pr_err("Cannot setup irqfd"); + break; + } + } else { + irq__update_msix_route(kvm, entry->gsi, &msi); + } + } +} + static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr, u8 offset, void *data, int sz) { @@ -151,6 +205,24 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd if (pread(device->fd, base + offset, sz, info->offset + offset) != sz) pr_warning("Failed to read %d bytes from Configuration Space at 0x%x", sz, offset); + + if (pdev->msi.cap && offset >= pdev->msi.pos && offset < + pdev->msi.pos + pdev->msi.size) { + vfio_pci_msi_write(kvm, device, offset - pdev->msi.pos, data, + sz); + } +} + +static ssize_t vfio_pci_msi_cap_size(struct msi_cap_64 *cap_hdr) +{ + size_t size = 10; + + if (cap_hdr->ctrl & PCI_MSI_FLAGS_64BIT) + size += 4; + if (cap_hdr->ctrl & PCI_MSI_FLAGS_MASKBIT) + size += 10; + + return size; } static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr) @@ -158,6 +230,8 @@ static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr) switch (cap_hdr->type) { case PCI_CAP_ID_MSIX: return PCI_CAP_MSIX_SIZEOF; + case PCI_CAP_ID_MSI: + return vfio_pci_msi_cap_size((void *)cap_hdr); default: pr_err("unknown PCI capability %u", cap_hdr->type); return 0; @@ -231,9 +305,10 @@ static int vfio_pci_parse_caps(struct vfio_device *device) switch (cap.type) { case PCI_CAP_ID_MSIX: + case PCI_CAP_ID_MSI: ret = vfio_pci_add_cap(device, &cap, info->offset, pos); if (ret) { - pr_warning("Failed to read MSI-X capability structure"); + pr_warning("Failed to read capability structure"); return ret; } break; @@ -493,11 +568,30 @@ out_free: return ret; } +static int vfio_pci_init_msi(struct kvm *kvm, struct vfio_pci_device *pdev, + struct msi_cap_64 *cap) +{ + pdev->msi = (struct vfio_pci_msi) { + .nr_entries = 1 << ((cap->ctrl & PCI_MSI_FLAGS_QMASK) >> 1), + .cap = cap, + .pos = (void *)cap - (void *)&pdev->hdr, + .size = vfio_pci_cap_size((void *)cap), + }; + + pdev->msi.entries = calloc(pdev->msi.nr_entries, + sizeof(struct vfio_pci_msix_entry)); + if (!pdev->msi.entries) + return -ENOMEM; + + return 0; +} + static int vfio_pci_configure_dev_regions(struct kvm *kvm, struct vfio_device *device) { int ret; struct msix_cap *msix; + struct msi_cap_64 *msi; struct vfio_pci_device *pdev = &device->pci; u32 i, num_regions = device->info.num_regions; @@ -512,6 +606,13 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm, return ret; } + msi = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSI); + if (msi) { + ret = vfio_pci_init_msi(kvm, pdev, msi); + if (ret) + return ret; + } + /* First of all, map the BARs directly into the guest */ for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) { struct vfio_region *region; @@ -576,30 +677,28 @@ static int vfio_configure_dev_regions(struct kvm *kvm, return vfio_pci_configure_dev_regions(kvm, device); } -static int vfio_pci_init_msix_irqfd(struct kvm *kvm, - struct vfio_device *device) +static int vfio_pci_init_msi_irqfd(int devfd, size_t index, size_t nr_entries, + struct vfio_pci_msix_entry *entries) { int ret; size_t i; int *eventfds; size_t irq_set_size; struct vfio_irq_set *irq_set; - struct vfio_pci_msix_table *table = &device->pci.msix_table; /* - * We likely have VFIO_IRQ_INFO_NORESIZE for MSI-X, and we don't want to + * We likely have VFIO_IRQ_INFO_NORESIZE for MSI/X, and we don't want to * enable/disable MSIs every time the guest requests a new one. Setup * IRQFD for all vectors upfront. * - * We cannot start creating the MSI-X routes in KVM just now. First we + * We cannot start creating the MSI routes in KVM just now. First we * need to wait for all devices to allocate their IRQ lines, and only * after that number is freezed will we be able to allocate MSI numbers. * A bit unfortunate (it would be much easier to handle initialization * errors here), but okay. Store eventfd until we're ready to create the * routes. */ - irq_set_size = sizeof(struct vfio_irq_set) + - table->nr_entries * sizeof(int); + irq_set_size = sizeof(struct vfio_irq_set) + nr_entries * sizeof(int); irq_set = malloc(irq_set_size); if (!irq_set) return -ENOMEM; @@ -607,13 +706,13 @@ static int vfio_pci_init_msix_irqfd(struct kvm *kvm, *irq_set = (struct vfio_irq_set) { .argsz = irq_set_size, .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, - .index = VFIO_PCI_MSIX_IRQ_INDEX, + .index = index, .start = 0, - .count = table->nr_entries, + .count = nr_entries, }; eventfds = (void *)irq_set + sizeof(struct vfio_irq_set); - for (i = 0; i < table->nr_entries; i++) { + for (i = 0; i < nr_entries; i++) { eventfds[i] = eventfd(0, 0); if (eventfds[i] < 0) { pr_err("cannot create eventfd (try to increase RLIMIT_NOFILE)"); @@ -621,11 +720,11 @@ static int vfio_pci_init_msix_irqfd(struct kvm *kvm, goto out_free; } - table->entries[i].gsi = -1; - table->entries[i].eventfd = eventfds[i]; + entries[i].gsi = -1; + entries[i].eventfd = eventfds[i]; } - ret = ioctl(device->fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = ioctl(devfd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret < 0) pr_err("Cannot register vfio_irq_set"); @@ -714,10 +813,12 @@ static int vfio_configure_dev_irqs(struct kvm *kvm, struct vfio_device *device) int ret; struct vfio_pci_device *pdev = &device->pci; struct msix_cap *msix = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSIX); + struct msi_cap_64 *msi = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSI); device->irq_info = (struct vfio_irq_info) { .argsz = sizeof(device->irq_info), .index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : + msi ? VFIO_PCI_MSI_IRQ_INDEX : VFIO_PCI_INTX_IRQ_INDEX, }; @@ -738,7 +839,20 @@ static int vfio_configure_dev_irqs(struct kvm *kvm, struct vfio_device *device) return -EINVAL; } - ret = vfio_pci_init_msix_irqfd(kvm, device); + ret = vfio_pci_init_msi_irqfd(device->fd, + VFIO_PCI_MSIX_IRQ_INDEX, + pdev->msix_table.nr_entries, + pdev->msix_table.entries); + } else if (msi) { + if (device->irq_info.count != pdev->msi.nr_entries) { + pr_err("Invalid number of MSIs reported by VFIO"); + return -EINVAL; + } + + ret = vfio_pci_init_msi_irqfd(device->fd, + VFIO_PCI_MSI_IRQ_INDEX, + pdev->msi.nr_entries, + pdev->msi.entries); } else { int gsi = pdev->hdr.irq_line - KVM_IRQ_OFFSET; -- 2.12.1