When a device has MSI capability but not MSI-X, use it. This patch is untested. Consider it broken unless proven otherwise. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@xxxxxxx> --- include/kvm/pci.h | 23 +++++++++ vfio/pci.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 167 insertions(+), 2 deletions(-) diff --git a/include/kvm/pci.h b/include/kvm/pci.h index 44e5adff..c5fc8254 100644 --- a/include/kvm/pci.h +++ b/include/kvm/pci.h @@ -59,6 +59,29 @@ struct msix_cap { u32 pba_offset; }; +struct msi_cap_64 { + u8 cap; + u8 next; + u16 ctrl; + u32 address_lo; + u32 address_hi; + u16 data; + u16 _align; + u32 mask_bits; + u32 pend_bits; +}; + +struct msi_cap_32 { + u8 cap; + u8 next; + u16 ctrl; + u32 address_lo; + u16 data; + u16 _align; + u32 mask_bits; + u32 pend_bits; +}; + struct pci_cap_hdr { u8 type; u8 next; diff --git a/vfio/pci.c b/vfio/pci.c index a273fabb..9e45d30b 100644 --- a/vfio/pci.c +++ b/vfio/pci.c @@ -297,6 +297,112 @@ static void vfio_pci_msix_cap_write(struct kvm *kvm, mutex_unlock(&pdev->msi.mutex); } +static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev, + u8 off, u8 *data, u32 sz) +{ + size_t i; + u32 mask = 0; + size_t mask_pos, start, limit; + struct vfio_pci_msi_entry *entry; + struct vfio_pci_device *pdev = &vdev->pci; + struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos; + + if (!(msi_cap_64->ctrl & PCI_MSI_FLAGS_MASKBIT)) + return 0; + + if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT) + mask_pos = PCI_MSI_MASK_64; + else + mask_pos = PCI_MSI_MASK_32; + + if (off >= mask_pos + 4 || off + sz <= mask_pos) + return 0; + + /* Set mask to current state */ + for (i = 0; i < pdev->msi.nr_entries; i++) { + entry = &pdev->msi.entries[i]; + mask |= !!msi_is_masked(entry->virt_state) << i; + } + + /* Update mask following the intersection of access and register */ + start = max_t(size_t, off, mask_pos); + limit = min_t(size_t, off + sz, mask_pos + 4); + + memcpy((void *)&mask + start - mask_pos, data + start - off, + limit - start); + + /* Update states if necessary */ + for (i = 0; i < pdev->msi.nr_entries; i++) { + bool masked = mask & (1 << i); + + entry = &pdev->msi.entries[i]; + if (masked != msi_is_masked(entry->virt_state)) { + msi_set_masked(entry->virt_state, masked); + vfio_pci_update_msi_entry(kvm, vdev, entry); + } + } + + return 1; +} + +static void vfio_pci_msi_cap_write(struct kvm *kvm, struct vfio_device *vdev, + u8 off, u8 *data, u32 sz) +{ + u8 ctrl; + struct msi_msg msg; + size_t i, nr_vectors; + struct vfio_pci_msi_entry *entry; + struct vfio_pci_device *pdev = &vdev->pci; + struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos; + + off -= pdev->msi.pos; + + /* Check if the guest is trying to update mask bits */ + if (vfio_pci_msi_vector_write(kvm, vdev, off, data, sz)) + return; + + /* Only modify routes when guest pokes the enable bit */ + if (off > PCI_MSI_FLAGS || off + sz <= PCI_MSI_FLAGS) + return; + + ctrl = *(u8 *)(data + PCI_MSI_FLAGS - off); + + mutex_lock(&pdev->msi.mutex); + + msi_set_enabled(pdev->msi.virt_state, ctrl & PCI_MSI_FLAGS_ENABLE); + + if (!msi_is_enabled(pdev->msi.virt_state)) { + vfio_pci_disable_msis(kvm, vdev); + mutex_unlock(&pdev->msi.mutex); + return; + } + + /* Create routes for the requested vectors */ + nr_vectors = 1 << ((ctrl & PCI_MSI_FLAGS_QSIZE) >> 4); + + msg.address_lo = msi_cap_64->address_lo; + if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT) { + msg.address_hi = msi_cap_64->address_hi; + msg.data = msi_cap_64->data; + } else { + struct msi_cap_32 *msi_cap_32 = (void *)msi_cap_64; + msg.address_hi = 0; + msg.data = msi_cap_32->data; + } + + for (i = 0; i < nr_vectors; i++) { + entry = &pdev->msi.entries[i]; + entry->config.msg = msg; + vfio_pci_update_msi_entry(kvm, vdev, entry); + } + + /* Update the physical capability if necessary */ + if (vfio_pci_enable_msis(kvm, vdev)) + dev_err(vdev, "cannot enable MSIX"); + + mutex_unlock(&pdev->msi.mutex); +} + static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr, u8 offset, void *data, int sz) { @@ -333,6 +439,9 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd /* Handle MSI write now, since it might update the hardware capability */ switch (pdev->irq_type) { + case VFIO_PCI_IRQ_MSI: + vfio_pci_msi_cap_write(kvm, vdev, offset, data, sz); + break; case VFIO_PCI_IRQ_MSIX: vfio_pci_msix_cap_write(kvm, vdev, offset, data, sz); break; @@ -345,11 +454,25 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd sz, offset); } +static ssize_t vfio_pci_msi_cap_size(struct msi_cap_64 *cap_hdr) +{ + size_t size = 10; + + if (cap_hdr->ctrl & PCI_MSI_FLAGS_64BIT) + size += 4; + if (cap_hdr->ctrl & PCI_MSI_FLAGS_MASKBIT) + size += 10; + + return size; +} + static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr) { switch (cap_hdr->type) { case PCI_CAP_ID_MSIX: return PCI_CAP_MSIX_SIZEOF; + case PCI_CAP_ID_MSI: + return vfio_pci_msi_cap_size((void *)cap_hdr); default: pr_err("unknown PCI capability 0x%x", cap_hdr->type); return 0; @@ -423,6 +546,7 @@ static int vfio_pci_parse_caps(struct vfio_device *vdev) switch (cap.type) { case PCI_CAP_ID_MSIX: + case PCI_CAP_ID_MSI: ret = vfio_pci_add_cap(vdev, &cap, info->offset, pos); if (ret) { dev_warn(vdev, "failed to read capability structure %x", @@ -431,7 +555,8 @@ static int vfio_pci_parse_caps(struct vfio_device *vdev) } pdev->msi.pos = pos; - pdev->irq_type = VFIO_PCI_IRQ_MSIX; + pdev->irq_type = cap.type == PCI_CAP_ID_MSIX ? + VFIO_PCI_IRQ_MSIX : VFIO_PCI_IRQ_MSI; break; /* Any other capability is hidden */ @@ -646,6 +771,19 @@ out_free: return ret; } +static int vfio_pci_create_msi_cap(struct kvm *kvm, struct vfio_pci_device *pdev) +{ + struct msi_cap_64 *cap = (void *)&pdev->hdr + pdev->msi.pos; + + pdev->msi.nr_entries = 1 << ((cap->ctrl & PCI_MSI_FLAGS_QMASK) >> 1), + pdev->msi.entries = calloc(pdev->msi.nr_entries, + sizeof(struct vfio_pci_msi_entry)); + if (!pdev->msi.entries) + return -ENOMEM; + + return 0; +} + static int vfio_pci_configure_dev_regions(struct kvm *kvm, struct vfio_device *vdev) { @@ -662,6 +800,9 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm, case VFIO_PCI_IRQ_MSIX: ret = vfio_pci_create_msix_table(kvm, pdev); break; + case VFIO_PCI_IRQ_MSI: + ret = vfio_pci_create_msi_cap(kvm, pdev); + break; default: break; } @@ -900,7 +1041,8 @@ static int vfio_pci_configure_dev_irqs(struct kvm *kvm, struct vfio_device *vdev return -EINVAL; } - if (pdev->irq_type == VFIO_PCI_IRQ_MSIX) { + if (pdev->irq_type == VFIO_PCI_IRQ_MSIX || + pdev->irq_type == VFIO_PCI_IRQ_MSI) { if (vdev->irq_info.count != pdev->msi.nr_entries) { dev_err(vdev, "invalid number of MSIs reported by VFIO"); return -EINVAL; -- 2.13.1