This patch emulated MSI-X per vector mask bit on assigned device. Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> --- hw/device-assignment.c | 167 +++++++++++++++++++++++++++++++++++++++++++++--- hw/device-assignment.h | 2 +- 2 files changed, 158 insertions(+), 11 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 5fe4a55..44b9d43 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -62,6 +62,11 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -264,6 +269,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = &r_dev->v_addrs[region_num]; PCIRegion *real_region = &r_dev->real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MASK + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MASK); + struct kvm_msix_mmio msix_mmio; +#endif DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", e_phys, region->u.r_virtbase, type, e_size, region_num); @@ -282,6 +291,21 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev->mmio_index); + +#ifdef KVM_CAP_MSIX_MASK + if (cap_mask) { + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_TYPE_ASSIGNED_DEV; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = KVM_MSIX_MMIO_FLAG_REGISTER; + ret = kvm_update_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif } } @@ -824,11 +848,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1125,6 +1144,34 @@ static int get_msix_entries_max_nr(AssignedDevice *adev) return entries_max_nr; } +static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry) +{ + uint32_t msg_ctrl; + void *va = adev->msix_table_page; +#ifdef KVM_CAP_MSIX_MASK + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MASK); + struct kvm_msix_entry msix_entry; + int r; + + if (cap_mask) { + memset(&msix_entry, 0, sizeof msix_entry); + msix_entry.id = calc_assigned_dev_id(adev->h_segnr, + adev->h_busnr, (uint8_t)adev->h_devfn); + msix_entry.type = KVM_MSIX_TYPE_ASSIGNED_DEV; + msix_entry.entry = entry; + msix_entry.query_flags = KVM_MSIX_FLAG_QUERY_MASKBIT; + r = kvm_get_msix_entry(kvm_context, &msix_entry); + if (r) { + DEBUG("Fail to get mask bit of entry %d\n", entry); + return 1; + } + return (msix_entry.flags & KVM_MSIX_FLAG_MASKBIT); + } +#endif + memcpy(&msg_ctrl, va + entry * 16 + 12, 4); + return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT); +} + static int get_msix_valid_entries_nr(AssignedDevice *adev, uint16_t entries_max_nr) { @@ -1137,7 +1184,7 @@ static int get_msix_valid_entries_nr(AssignedDevice *adev, for (i = 0; i < entries_max_nr; i++) { memcpy(&msg_ctrl, va + i * 16 + 12, 4); /* Ignore unused entry even it's unmasked */ - if (msg_ctrl & PCI_MSIX_CTRL_MASKBIT) + if (assigned_dev_msix_entry_masked(adev, i)) continue; entries_nr ++; } @@ -1166,6 +1213,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, } free_dev_irq_entries(adev); + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * + sizeof(*pci_dev->msix_entry_used)); adev->irq_entries_nr = entries_nr; adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry)); if (!adev->entry) { @@ -1179,7 +1228,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, if (entries_nr >= msix_nr.entry_nr) break; memcpy(&msg_ctrl, va + i * 16 + 12, 4); - if (msg_ctrl & PCI_MSIX_CTRL_MASKBIT) + if (assigned_dev_msix_entry_masked(adev, i)) continue; memcpy(&msg_data, va + i * 16 + 8, 4); @@ -1201,6 +1250,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, msix_entry.gsi = adev->entry[entries_nr].gsi; msix_entry.entry = i; + pci_dev->msix_entry_used[i] = 1; r = kvm_assign_set_msix_entry(kvm_context, &msix_entry); if (r) { fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r)); @@ -1244,6 +1294,8 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix) perror("assigned_dev_update_msix: deassign irq"); assigned_dev->irq_requested_type = 0; + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * + sizeof(*pci_dev->msix_entry_used)); } entries_max_nr = get_msix_entries_max_nr(assigned_dev); @@ -1251,10 +1303,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix) fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0"); return; } + /* + * Guest may try to enable MSI-X before setting MSI-X entry done, so + * let's wait until guest unmask the entries. + */ entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr); if (entries_nr == 0) { - if (enable_msix) - fprintf(stderr, "MSI-X entry number is zero!\n"); return; } if (enable_msix) { @@ -1298,7 +1352,8 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad if (address <= ctrl_pos && address + len > ctrl_pos) { ctrl_pos--; /* control is word long */ ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos); - assigned_dev_update_msix(pci_dev, (*ctrl_word & PCI_MSIX_ENABLE)); + assigned_dev_update_msix(pci_dev, + (*ctrl_word & PCI_MSIX_ENABLE) && !(*ctrl_word & PCI_MSIX_MASK)); } pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; } @@ -1352,6 +1407,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) bar_nr = msix_table_entry & PCI_MSIX_BIR; msix_table_entry &= ~PCI_MSIX_BIR; dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->max_msix_entries_nr = get_msix_entries_max_nr(dev); if (next_cap_pt != 0) { pci_dev->config[pci_dev->cap.start + next_cap_pt] = pci_dev->cap.start + pci_dev->cap.length; @@ -1396,10 +1452,97 @@ static void msix_mmio_writel(void *opaque, AssignedDevice *adev = opaque; unsigned int offset = addr & 0xfff; void *page = adev->msix_table_page; + int pos, ctrl_word, index; + struct kvm_irq_routing_entry new_entry = {}; + int entry_idx, entries_max_nr, r = 0, i; + uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr; DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", addr, val); memcpy((void *)((char *)page + offset), &val, 4); + + index = offset / 16; + + /* Check if mask bit is being accessed */ + memcpy(&msg_addr, (char *)page + index * 16, 4); + memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4); + memcpy(&msg_data, (char *)page + index * 16 + 8, 4); + memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4); + DEBUG("MSI-X entries index %d: " + "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n", + index, msg_addr, msg_upper_addr, msg_data, msg_ctrl); + + if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI) + pos = adev->dev.cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH; + else + pos = adev->dev.cap.start; + + ctrl_word = *(uint16_t *)(adev->dev.config + pos + 2); + + if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK))) + return; + + if (!assigned_dev_msix_entry_masked(adev, index)) { + if (!adev->dev.msix_entry_used[index]) { + DEBUG("Try to modify unenabled MSI-X entry %d's mask. " + "Reenable MSI-X.\n", + index); + assigned_dev_update_msix(&adev->dev, 1); + } + return; + } + + if (!adev->dev.msix_entry_used[index]) + return; + + /* + * We're here only because guest want to modify MSI data/addr, and + * kernel would filter those writing with mask bit unset. + */ + entries_max_nr = get_msix_entries_max_nr(adev); + + /* + * Find the index of routing entry, it can be different from 'index' if + * empty entry existed in between + */ + entry_idx = -1; + for (i = 0; i <= index; i++) { + if (adev->dev.msix_entry_used[i]) + entry_idx ++; + } + if (entry_idx >= entries_max_nr || entry_idx == -1) { + fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n", + entry_idx); + return; + } + + if (!assigned_dev_msix_entry_masked(adev, index)) { + fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n"); + return; + } + + new_entry.gsi = adev->entry[entry_idx].gsi; + new_entry.type = KVM_IRQ_ROUTING_MSI; + new_entry.flags = 0; + new_entry.u.msi.address_lo = msg_addr; + new_entry.u.msi.address_hi = msg_upper_addr; + new_entry.u.msi.data = msg_data; + if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi, + sizeof new_entry.u.msi)) { + r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry); + if (r) { + perror("msix_mmio_writel: kvm_update_routing_entry failed\n"); + return; + } + r = kvm_commit_irq_routes(); + if (r) { + perror("msix_mmio_writel: kvm_commit_irq_routes failed\n"); + return; + } + } + adev->entry[entry_idx].u.msi.address_lo = msg_addr; + adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr; + adev->entry[entry_idx].u.msi.data = msg_data; } static void msix_mmio_writew(void *opaque, @@ -1440,6 +1583,8 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1; dev->mmio_index = cpu_register_io_memory( msix_mmio_read, msix_mmio_write, dev); + dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * + sizeof *dev->dev.msix_entry_used); return 0; } @@ -1456,6 +1601,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); } dev->msix_table_page = NULL; + free(dev->dev.msix_entry_used); + dev->dev.msix_entry_used = NULL; } static int assigned_initfn(struct PCIDevice *pci_dev) diff --git a/hw/device-assignment.h b/hw/device-assignment.h index 2f5fa17..17c85c4 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -98,7 +98,7 @@ typedef struct AssignedDevice { #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) uint32_t state; } cap; - int irq_entries_nr; + int irq_entries_nr, max_msix_entries_nr; struct kvm_irq_routing_entry *entry; void *msix_table_page; target_phys_addr_t msix_table_addr; -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html