On Wed, Feb 01, 2012 at 06:48:32AM -0700, Alex Williamson wrote: > On Wed, 2012-02-01 at 09:22 +0200, Michael S. Tsirkin wrote: > > On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote: > > > When a guest enables MSI-X in PCI configuration space we walk > > > through the MSI-X vector table trying to guess what might get > > > used. We have to guess because we don't do anything with > > > writes to the vector table, so we look for clues ahead of time > > > to pre-enable the vectors we think will be used. This means > > > that instead of doing the sane thing and test the mask bit, we > > > test whether the data field contains a non-zero value. It's > > > amazing this works as well as it does. > > > > > > However, two key things missed by doing this is that we don't > > > catch vector changes after enabling (ex. setting smp_affinity > > > on an irq) and we don't support guests that don't touch the > > > vector table prior to enabling the MSI-X capability (ex. > > > freebsd). This patch fixes both of these problems. > > > > > > NB we're not actually masking vectors yet with this patch as > > > it's unclear whether we really have the ability to do this > > > without losing interrupts. > > > > > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > > > > Overall I think this is better than what we have now. > > One question though: would it make sense to use msix mask notifiers > > instead of parsing msix tables on your own? > > Based on vfio, I think some minor changes would be necessary to the msix > core to port to that infrastructure. I don't think I'd want to use this > code to push those changes since it likely won't get ported to qemu.git. > The parsing is pretty trivial now that we're not counting bytes anyway. > Thanks, > > Alex Well the notifiers are a small bit of code, so I can see myself porting them forward even if there are no users in qemu.git. Let me know if you think it makes sense. > > > > --- > > > > > > hw/device-assignment.c | 92 ++++++++++++++++++++++++++++++++++++++++++------ > > > 1 files changed, 81 insertions(+), 11 deletions(-) > > > > > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > > > index 71685ee..9791ec9 100644 > > > --- a/hw/device-assignment.c > > > +++ b/hw/device-assignment.c > > > @@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) > > > } > > > } > > > > > > +static bool msix_masked(MSIXTableEntry *entry) > > > +{ > > > + return (entry->ctrl & cpu_to_le32(0x1)) != 0; > > > +} > > > + > > > static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) > > > { > > > AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); > > > @@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) > > > > > > /* Get the usable entry number for allocating */ > > > for (i = 0; i < adev->msix_max; i++, entry++) { > > > - /* Ignore unused entry even it's unmasked */ > > > - if (entry->data == 0) { > > > + if (msix_masked(entry)) { > > > continue; > > > } > > > entries_nr++; > > > } > > > > > > - if (entries_nr == 0) { > > > - fprintf(stderr, "MSI-X entry number is zero!\n"); > > > - return -EINVAL; > > > + DEBUG("MSI-X entries: %d\n", entries_nr); > > > + > > > + /* It's valid to enable MSI-X with all entries masked */ > > > + if (!entries_nr) { > > > + return 0; > > > } > > > + > > > msix_nr.assigned_dev_id = calc_assigned_dev_id(adev); > > > msix_nr.entry_nr = entries_nr; > > > r = kvm_assign_set_msix_nr(kvm_state, &msix_nr); > > > @@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) > > > msix_entry.assigned_dev_id = msix_nr.assigned_dev_id; > > > entry = adev->msix_table; > > > for (i = 0; i < adev->msix_max; i++, entry++) { > > > - if (entry->data == 0) { > > > + if (msix_masked(entry)) { > > > continue; > > > } > > > > > > @@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) > > > perror("assigned_dev_update_msix_mmio"); > > > return; > > > } > > > - if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) { > > > - perror("assigned_dev_enable_msix: assign irq"); > > > - return; > > > + > > > + if (assigned_dev->irq_entries_nr) { > > > + if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) { > > > + perror("assigned_dev_enable_msix: assign irq"); > > > + return; > > > + } > > > } > > > assigned_dev->girq = -1; > > > assigned_dev->irq_requested_type = assigned_irq_data.flags; > > > @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, > > > uint64_t val, unsigned size) > > > { > > > AssignedDevice *adev = opaque; > > > + PCIDevice *pdev = &adev->dev; > > > + uint16_t ctrl; > > > + MSIXTableEntry orig; > > > + int i = addr >> 4; > > > + > > > + if (i >= adev->msix_max) { > > > + return; /* Drop write */ > > > + } > > > > > > - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n", > > > - addr, val); > > > + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS); > > > + > > > + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val); > > > + > > > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) { > > > + orig = adev->msix_table[i]; > > > + } > > > > > > memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size); > > > + > > > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) { > > > + MSIXTableEntry *entry = &adev->msix_table[i]; > > > + > > > + if (!msix_masked(&orig) && msix_masked(entry)) { > > > + /* > > > + * Vector masked, disable it > > > + * > > > + * XXX It's not clear if we can or should actually attempt > > > + * to mask or disable the interrupt. KVM doesn't have > > > + * support for pending bits and kvm_assign_set_msix_entry > > > + * doesn't modify the device hardware mask. Interrupts > > > + * while masked are simply not injected to the guest, so > > > + * are lost. Can we get away with always injecting an > > > + * interrupt on unmask? > > > + */ > > > + } else if (msix_masked(&orig) && !msix_masked(entry)) { > > > + /* Vector unmasked */ > > > + if (i >= adev->irq_entries_nr || !adev->entry[i].type) { > > > + /* Previously unassigned vector, start from scratch */ > > > + assigned_dev_update_msix(pdev); > > > + return; > > > + } else { > > > + /* Update an existing, previously masked vector */ > > > + struct kvm_irq_routing_entry orig = adev->entry[i]; > > > + int ret; > > > + > > > + adev->entry[i].u.msi.address_lo = entry->addr_lo; > > > + adev->entry[i].u.msi.address_hi = entry->addr_hi; > > > + adev->entry[i].u.msi.data = entry->data; > > > + > > > + ret = kvm_update_routing_entry(&orig, &adev->entry[i]); > > > + if (ret) { > > > + fprintf(stderr, > > > + "Error updating irq routing entry (%d)\n", ret); > > > + return; > > > + } > > > + > > > + ret = kvm_commit_irq_routes(); > > > + if (ret) { > > > + fprintf(stderr, > > > + "Error committing irq routes (%d)\n", ret); > > > + return; > > > + } > > > + } > > > + } > > > + } > > > } > > > > > > static const MemoryRegionOps msix_mmio_ops = { > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html