Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Wed, 1 Feb 2012 17:13:07 +0200

On Wed, Feb 01, 2012 at 06:48:32AM -0700, Alex Williamson wrote:
> On Wed, 2012-02-01 at 09:22 +0200, Michael S. Tsirkin wrote:
> > On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> > > When a guest enables MSI-X in PCI configuration space we walk
> > > through the MSI-X vector table trying to guess what might get
> > > used.  We have to guess because we don't do anything with
> > > writes to the vector table, so we look for clues ahead of time
> > > to pre-enable the vectors we think will be used.  This means
> > > that instead of doing the sane thing and test the mask bit, we
> > > test whether the data field contains a non-zero value.  It's
> > > amazing this works as well as it does.
> > > 
> > > However, two key things missed by doing this is that we don't
> > > catch vector changes after enabling (ex. setting smp_affinity
> > > on an irq) and we don't support guests that don't touch the
> > > vector table prior to enabling the MSI-X capability (ex.
> > > freebsd).  This patch fixes both of these problems.
> > > 
> > > NB we're not actually masking vectors yet with this patch as
> > > it's unclear whether we really have the ability to do this
> > > without losing interrupts.
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> > 
> > Overall I think this is better than what we have now.
> > One question though: would it make sense to use msix mask notifiers
> > instead of parsing msix tables on your own?
> 
> Based on vfio, I think some minor changes would be necessary to the msix
> core to port to that infrastructure.  I don't think I'd want to use this
> code to push those changes since it likely won't get ported to qemu.git.
> The parsing is pretty trivial now that we're not counting bytes anyway.
> Thanks,
> 
> Alex

Well the notifiers are a small bit of code,
so I can see myself porting them forward even if
there are no users in qemu.git.
Let me know if you think it makes sense.

> 
> > > ---
> > > 
> > >  hw/device-assignment.c |   92 ++++++++++++++++++++++++++++++++++++++++++------
> > >  1 files changed, 81 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> > > index 71685ee..9791ec9 100644
> > > --- a/hw/device-assignment.c
> > > +++ b/hw/device-assignment.c
> > > @@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> > >      }
> > >  }
> > >  
> > > +static bool msix_masked(MSIXTableEntry *entry)
> > > +{
> > > +    return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> > > +}
> > > +
> > >  static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > >  {
> > >      AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> > > @@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > >  
> > >      /* Get the usable entry number for allocating */
> > >      for (i = 0; i < adev->msix_max; i++, entry++) {
> > > -        /* Ignore unused entry even it's unmasked */
> > > -        if (entry->data == 0) {
> > > +        if (msix_masked(entry)) {
> > >              continue;
> > >          }
> > >          entries_nr++;
> > >      }
> > >  
> > > -    if (entries_nr == 0) {
> > > -        fprintf(stderr, "MSI-X entry number is zero!\n");
> > > -        return -EINVAL;
> > > +    DEBUG("MSI-X entries: %d\n", entries_nr);
> > > +
> > > +    /* It's valid to enable MSI-X with all entries masked */
> > > +    if (!entries_nr) {
> > > +        return 0;
> > >      }
> > > +
> > >      msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
> > >      msix_nr.entry_nr = entries_nr;
> > >      r = kvm_assign_set_msix_nr(kvm_state, &msix_nr);
> > > @@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > >      msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
> > >      entry = adev->msix_table;
> > >      for (i = 0; i < adev->msix_max; i++, entry++) {
> > > -        if (entry->data == 0) {
> > > +        if (msix_masked(entry)) {
> > >              continue;
> > >          }
> > >  
> > > @@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
> > >              perror("assigned_dev_update_msix_mmio");
> > >              return;
> > >          }
> > > -        if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > > -            perror("assigned_dev_enable_msix: assign irq");
> > > -            return;
> > > +
> > > +        if (assigned_dev->irq_entries_nr) {
> > > +            if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > > +                perror("assigned_dev_enable_msix: assign irq");
> > > +                return;
> > > +            }
> > >          }
> > >          assigned_dev->girq = -1;
> > >          assigned_dev->irq_requested_type = assigned_irq_data.flags;
> > > @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> > >                              uint64_t val, unsigned size)
> > >  {
> > >      AssignedDevice *adev = opaque;
> > > +    PCIDevice *pdev = &adev->dev;
> > > +    uint16_t ctrl;
> > > +    MSIXTableEntry orig;
> > > +    int i = addr >> 4;
> > > +
> > > +    if (i >= adev->msix_max) {
> > > +        return; /* Drop write */
> > > +    }
> > >  
> > > -    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> > > -          addr, val);
> > > +    ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> > > +
> > > +    DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> > > +
> > > +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > > +        orig = adev->msix_table[i];
> > > +    }
> > >  
> > >      memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
> > > +
> > > +    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > > +        MSIXTableEntry *entry = &adev->msix_table[i];
> > > +
> > > +        if (!msix_masked(&orig) && msix_masked(entry)) {
> > > +            /*
> > > +             * Vector masked, disable it
> > > +             *
> > > +             * XXX It's not clear if we can or should actually attempt
> > > +             * to mask or disable the interrupt.  KVM doesn't have
> > > +             * support for pending bits and kvm_assign_set_msix_entry
> > > +             * doesn't modify the device hardware mask.  Interrupts
> > > +             * while masked are simply not injected to the guest, so
> > > +             * are lost.  Can we get away with always injecting an
> > > +             * interrupt on unmask?
> > > +             */
> > > +        } else if (msix_masked(&orig) && !msix_masked(entry)) {
> > > +            /* Vector unmasked */
> > > +            if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> > > +                /* Previously unassigned vector, start from scratch */
> > > +                assigned_dev_update_msix(pdev);
> > > +                return;
> > > +            } else {
> > > +                /* Update an existing, previously masked vector */
> > > +                struct kvm_irq_routing_entry orig = adev->entry[i];
> > > +                int ret;
> > > +
> > > +                adev->entry[i].u.msi.address_lo = entry->addr_lo;
> > > +                adev->entry[i].u.msi.address_hi = entry->addr_hi;
> > > +                adev->entry[i].u.msi.data = entry->data;
> > > +
> > > +                ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> > > +                if (ret) {
> > > +                    fprintf(stderr,
> > > +                            "Error updating irq routing entry (%d)\n", ret);
> > > +                    return;
> > > +                }
> > > +
> > > +                ret = kvm_commit_irq_routes();
> > > +                if (ret) {
> > > +                    fprintf(stderr,
> > > +                            "Error committing irq routes (%d)\n", ret);
> > > +                    return;
> > > +                }
> > > +            }
> > > +        }
> > > +    }
> > >  }
> > >  
> > >  static const MemoryRegionOps msix_mmio_ops = {
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html