On Thursday 16 December 2010 16:21:52 Sheng Yang wrote: > The old MSI-X enabling method assume the entries are written before MSI-X > enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix > this. > > Also, according to the PCI spec, mask bit of MSI-X table should be set > after reset. Ping? -- regards Yang, Sheng > > Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> > --- > hw/device-assignment.c | 188 > +++++++++++++++++++++++++++++++++++++++++------- hw/device- assignment.h | > 2 +- > 2 files changed, 162 insertions(+), 28 deletions(-) > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > index 832c236..ed0b491 100644 > --- a/hw/device-assignment.c > +++ b/hw/device-assignment.c > @@ -1111,15 +1111,12 @@ static void assigned_dev_update_msi(PCIDevice > *pci_dev, unsigned int ctrl_pos) #endif > > #ifdef KVM_CAP_DEVICE_MSIX > -static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) > + > +#define PCI_MSIX_CTRL_MASKBIT 1ul > +static int get_msix_entries_max_nr(AssignedDevice *adev) > { > - AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); > - uint16_t entries_nr = 0, entries_max_nr; > - int pos = 0, i, r = 0; > - uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; > - struct kvm_assigned_msix_nr msix_nr; > - struct kvm_assigned_msix_entry msix_entry; > - void *va = adev->msix_table_page; > + int pos, entries_max_nr; > + PCIDevice *pci_dev = &adev->dev; > > pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); > > @@ -1127,20 +1124,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice > *pci_dev) entries_max_nr &= PCI_MSIX_TABSIZE; > entries_max_nr += 1; > > + return entries_max_nr; > +} > + > +static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry) > +{ > + uint32_t msg_ctrl; > + void *va = adev->msix_table_page; > + > + memcpy(&msg_ctrl, va + entry * 16 + 12, 4); > + return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT); > +} > + > +static int get_msix_valid_entries_nr(AssignedDevice *adev, > + uint16_t entries_max_nr) > +{ > + void *va = adev->msix_table_page; > + uint32_t msg_ctrl; > + uint16_t entries_nr = 0; > + int i; > + > /* Get the usable entry number for allocating */ > for (i = 0; i < entries_max_nr; i++) { > memcpy(&msg_ctrl, va + i * 16 + 12, 4); > - memcpy(&msg_data, va + i * 16 + 8, 4); > /* Ignore unused entry even it's unmasked */ > - if (msg_data == 0) > + if (assigned_dev_msix_entry_masked(adev, i)) > continue; > entries_nr ++; > } > + return entries_nr; > +} > + > +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, > + uint16_t entries_nr, > + uint16_t entries_max_nr) > +{ > + AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); > + int i, r = 0; > + uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; > + struct kvm_assigned_msix_nr msix_nr; > + struct kvm_assigned_msix_entry msix_entry; > + void *va = adev->msix_table_page; > > - if (entries_nr == 0) { > - fprintf(stderr, "MSI-X entry number is zero!\n"); > - return -EINVAL; > - } > msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, > adev->h_busnr, (uint8_t)adev->h_devfn); msix_nr.entry_nr = entries_nr; > @@ -1152,6 +1177,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice > *pci_dev) } > > free_dev_irq_entries(adev); > + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * > + > sizeof(*pci_dev->msix_entry_used)); adev->irq_entries_nr = entries_nr; > adev->entry = calloc(entries_nr, sizeof(struct > kvm_irq_routing_entry)); if (!adev->entry) { > @@ -1165,10 +1192,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice > *pci_dev) if (entries_nr >= msix_nr.entry_nr) > break; > memcpy(&msg_ctrl, va + i * 16 + 12, 4); > - memcpy(&msg_data, va + i * 16 + 8, 4); > - if (msg_data == 0) > + if (assigned_dev_msix_entry_masked(adev, i)) > continue; > > + memcpy(&msg_data, va + i * 16 + 8, 4); > memcpy(&msg_addr, va + i * 16, 4); > memcpy(&msg_upper_addr, va + i * 16 + 4, 4); > > @@ -1182,17 +1209,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice > *pci_dev) adev->entry[entries_nr].u.msi.address_lo = msg_addr; > adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr; > adev->entry[entries_nr].u.msi.data = msg_data; > - DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, > msg_addr); - kvm_add_routing_entry(&adev->entry[entries_nr]); > + DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, > msg_addr); + kvm_add_routing_entry(&adev->entry[entries_nr]); > > msix_entry.gsi = adev->entry[entries_nr].gsi; > msix_entry.entry = i; > + pci_dev->msix_entry_used[i] = 1; > r = kvm_assign_set_msix_entry(kvm_context, &msix_entry); > if (r) { > fprintf(stderr, "fail to set MSI-X entry! %s\n", > strerror(-r)); break; > } > - DEBUG("MSI-X entry gsi 0x%x, entry %d\n!", > + DEBUG("MSI-X entry gsi 0x%x, entry %d!\n", > msix_entry.gsi, msix_entry.entry); > entries_nr ++; > } > @@ -1209,20 +1237,24 @@ static void assigned_dev_update_msix(PCIDevice > *pci_dev, unsigned int ctrl_pos) { > struct kvm_assigned_irq assigned_irq_data; > AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, > dev); - uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos); > + uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos); int > r; > + uint16_t entries_nr, entries_max_nr; > + int enable_msix; > > memset(&assigned_irq_data, 0, sizeof assigned_irq_data); > assigned_irq_data.assigned_dev_id = > calc_assigned_dev_id(assigned_dev->h_segnr, > assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn); > > + enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) && > + !(ctrl_word & PCI_MSIX_MASK)); > + > /* Some guests gratuitously disable MSIX even if they're not using it, > * try to catch this by only deassigning irqs if the guest is using > * MSIX or intends to start. */ > if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) || > - (*ctrl_word & PCI_MSIX_ENABLE)) { > - > + enable_msix) { > assigned_irq_data.flags = assigned_dev->irq_requested_type; > free_dev_irq_entries(assigned_dev); > r = kvm_deassign_irq(kvm_context, &assigned_irq_data); > @@ -1231,16 +1263,30 @@ static void assigned_dev_update_msix(PCIDevice > *pci_dev, unsigned int ctrl_pos) perror("assigned_dev_update_msix: > deassign irq"); > > assigned_dev->irq_requested_type = 0; > + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * > + > sizeof(*pci_dev->msix_entry_used)); } > > - if (*ctrl_word & PCI_MSIX_ENABLE) { > - assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | > - KVM_DEV_IRQ_GUEST_MSIX; > - > - if (assigned_dev_update_msix_mmio(pci_dev) < 0) { > + entries_max_nr = assigned_dev->max_msix_entries_nr; > + if (entries_max_nr == 0) { > + fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == > 0"); + return; > + } > + /* > + * Guest may try to enable MSI-X before setting MSI-X entry done, so > + * let's wait until guest unmask the entries. > + */ > + entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr); > + if (entries_nr == 0) > + return; > + if (enable_msix) { > + if (assigned_dev_update_msix_mmio(pci_dev, > + entries_nr, entries_max_nr) < 0) { > perror("assigned_dev_update_msix_mmio"); > return; > } > + assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | > + KVM_DEV_IRQ_GUEST_MSIX; > if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) { > perror("assigned_dev_enable_msix: assign irq"); > return; > @@ -1341,6 +1387,7 @@ static int assigned_device_pci_cap_init(PCIDevice > *pci_dev) bar_nr = msix_table_entry & PCI_MSIX_BIR; > msix_table_entry &= ~PCI_MSIX_BIR; > dev->msix_table_addr = pci_region[bar_nr].base_addr + > msix_table_entry; + dev->max_msix_entries_nr = > get_msix_entries_max_nr(dev); > } > #endif > #endif > @@ -1378,10 +1425,90 @@ static void msix_mmio_writel(void *opaque, > AssignedDevice *adev = opaque; > unsigned int offset = addr & 0xfff; > void *page = adev->msix_table_page; > + int ctrl_word, index; > + struct kvm_irq_routing_entry new_entry = {}; > + int entry_idx, entries_max_nr, r = 0, i; > + uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr; > + struct PCIDevice *pci_dev = &adev->dev; > + uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); > > DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", > addr, val); > memcpy((void *)((char *)page + offset), &val, 4); > + > + index = offset / 16; > + > + /* Check if mask bit is being accessed */ > + memcpy(&msg_addr, (char *)page + index * 16, 4); > + memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4); > + memcpy(&msg_data, (char *)page + index * 16 + 8, 4); > + memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4); > + DEBUG("MSI-X entries index %d: " > + "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl > 0x%x\n", + index, msg_addr, msg_upper_addr, msg_data, > msg_ctrl); + > + ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS); > + > + if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK))) > + return; > + > + if (!assigned_dev_msix_entry_masked(adev, index)) { > + if (!adev->dev.msix_entry_used[index]) { > + DEBUG("Try to modify unenabled MSI-X entry %d's mask. " > + "Reenable MSI-X.\n", > + index); > + assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS); > + } > + return; > + } > + > + if (!adev->dev.msix_entry_used[index]) > + return; > + > + entries_max_nr = adev->max_msix_entries_nr; > + > + /* > + * Find the index of routing entry, it can be different from 'index' > if + * empty entry existed in between > + */ > + entry_idx = -1; > + for (i = 0; i <= index; i++) { > + if (adev->dev.msix_entry_used[i]) > + entry_idx ++; > + } > + if (entry_idx >= entries_max_nr || entry_idx == -1) { > + fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n", > + entry_idx); > + return; > + } > + > + if (!assigned_dev_msix_entry_masked(adev, index)) { > + fprintf(stderr, "msix_mmio_writel: Trying write to unmasked > entry!\n"); + return; > + } > + > + new_entry.gsi = adev->entry[entry_idx].gsi; > + new_entry.type = KVM_IRQ_ROUTING_MSI; > + new_entry.flags = 0; > + new_entry.u.msi.address_lo = msg_addr; > + new_entry.u.msi.address_hi = msg_upper_addr; > + new_entry.u.msi.data = msg_data; > + if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi, > + sizeof new_entry.u.msi)) { > + r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry); > + if (r) { > + perror("msix_mmio_writel: kvm_update_routing_entry failed\n"); > + return; > + } > + r = kvm_commit_irq_routes(); > + if (r) { > + perror("msix_mmio_writel: kvm_commit_irq_routes failed\n"); > + return; > + } > + } > + adev->entry[entry_idx].u.msi.address_lo = msg_addr; > + adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr; > + adev->entry[entry_idx].u.msi.data = msg_data; > } > > static void msix_mmio_writew(void *opaque, > @@ -1408,6 +1535,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = { > > static int assigned_dev_register_msix_mmio(AssignedDevice *dev) > { > + int i; > dev->msix_table_page = mmap(NULL, 0x1000, > PROT_READ|PROT_WRITE, > MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); > @@ -1417,8 +1545,12 @@ static int > assigned_dev_register_msix_mmio(AssignedDevice *dev) return -EFAULT; > } > memset(dev->msix_table_page, 0, 0x1000); > + for (i = 0; i < 0x1000; i += 0x10) > + *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1; > dev->mmio_index = cpu_register_io_memory( > msix_mmio_read, msix_mmio_write, dev); > + dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * > + sizeof *dev->dev.msix_entry_used); > return 0; > } > > @@ -1435,6 +1567,8 @@ static void > assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); > } > dev->msix_table_page = NULL; > + free(dev->dev.msix_entry_used); > + dev->dev.msix_entry_used = NULL; > } > > static const VMStateDescription vmstate_assigned_device = { > diff --git a/hw/device-assignment.h b/hw/device-assignment.h > index c94a730..754e5c0 100644 > --- a/hw/device-assignment.h > +++ b/hw/device-assignment.h > @@ -104,7 +104,7 @@ typedef struct AssignedDevice { > #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) > uint32_t state; > } cap; > - int irq_entries_nr; > + int irq_entries_nr, max_msix_entries_nr; > struct kvm_irq_routing_entry *entry; > void *msix_table_page; > target_phys_addr_t msix_table_addr; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html