Re: [RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Mon, 17 Oct 2011 17:48:53 +0200

On Mon, Oct 17, 2011 at 11:28:02AM +0200, Jan Kiszka wrote:
> This optimization was only required to keep KVM route usage low. Now
> that we solve that problem via lazy updates, we can drop the field. We
> still need interfaces to clear pending vectors, though (and we have to
> make use of them more broadly - but that's unrelated to this patch).
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>

Lazy updates should be an implementation detail.
IMO resource tracking of vectors makes sense
as an API. Making devices deal with pending
vectors as a concept, IMO, does not.

> ---
>  hw/ivshmem.c    |   16 ++-----------
>  hw/msix.c       |   62 +++++++++++-------------------------------------------
>  hw/msix.h       |    5 +--
>  hw/pci.h        |    2 -
>  hw/virtio-pci.c |   20 +++++++----------
>  5 files changed, 26 insertions(+), 79 deletions(-)
> 
> diff --git a/hw/ivshmem.c b/hw/ivshmem.c
> index 242fbea..a402c98 100644
> --- a/hw/ivshmem.c
> +++ b/hw/ivshmem.c
> @@ -535,10 +535,8 @@ static uint64_t ivshmem_get_size(IVShmemState * s) {
>      return value;
>  }
>  
> -static void ivshmem_setup_msi(IVShmemState * s) {
> -
> -    int i;
> -
> +static void ivshmem_setup_msi(IVShmemState *s)
> +{
>      /* allocate the MSI-X vectors */
>  
>      memory_region_init(&s->msix_bar, "ivshmem-msix", 4096);
> @@ -551,11 +549,6 @@ static void ivshmem_setup_msi(IVShmemState * s) {
>          exit(1);
>      }
>  
> -    /* 'activate' the vectors */
> -    for (i = 0; i < s->vectors; i++) {
> -        msix_vector_use(&s->dev, i);
> -    }
> -
>      /* allocate Qemu char devices for receiving interrupts */
>      s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
>  }
> @@ -581,7 +574,7 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
>      IVSHMEM_DPRINTF("ivshmem_load\n");
>  
>      IVShmemState *proxy = opaque;
> -    int ret, i;
> +    int ret;
>  
>      if (version_id > 0) {
>          return -EINVAL;
> @@ -599,9 +592,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
>  
>      if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
>          msix_load(&proxy->dev, f);
> -        for (i = 0; i < proxy->vectors; i++) {
> -            msix_vector_use(&proxy->dev, i);
> -        }
>      } else {
>          proxy->intrstatus = qemu_get_be32(f);
>          proxy->intrmask = qemu_get_be32(f);
> diff --git a/hw/msix.c b/hw/msix.c
> index ce3375a..f1b97b5 100644
> --- a/hw/msix.c
> +++ b/hw/msix.c
> @@ -292,9 +292,6 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
>      if (nentries > MSIX_MAX_ENTRIES)
>          return -EINVAL;
>  
> -    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
> -                                        sizeof *dev->msix_entry_used);
> -
>      dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
>      msix_mask_all(dev, nentries);
>  
> @@ -317,21 +314,9 @@ err_config:
>      memory_region_destroy(&dev->msix_mmio);
>      g_free(dev->msix_table_page);
>      dev->msix_table_page = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>      return ret;
>  }
>  
> -static void msix_free_irq_entries(PCIDevice *dev)
> -{
> -    int vector;
> -
> -    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> -        dev->msix_entry_used[vector] = 0;
> -        msix_clr_pending(dev, vector);
> -    }
> -}
> -
>  /* Clean up resources for the device. */
>  int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
>  {
> @@ -340,14 +325,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
>      }
>      pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
>      dev->msix_cap = 0;
> -    msix_free_irq_entries(dev);
>      dev->msix_entries_nr = 0;
>      memory_region_del_subregion(bar, &dev->msix_mmio);
>      memory_region_destroy(&dev->msix_mmio);
>      g_free(dev->msix_table_page);
>      dev->msix_table_page = NULL;
> -    g_free(dev->msix_entry_used);
> -    dev->msix_entry_used = NULL;
>  
>      kvm_msix_free(dev);
>      g_free(dev->msix_cache);
> @@ -376,7 +358,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
>          return;
>      }
>  
> -    msix_free_irq_entries(dev);
>      qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
>      qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
>  }
> @@ -407,7 +388,7 @@ void msix_notify(PCIDevice *dev, unsigned vector)
>  {
>      MSIMessage msg;
>  
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
> +    if (vector >= dev->msix_entries_nr)
>          return;
>      if (msix_is_masked(dev, vector)) {
>          msix_set_pending(dev, vector);
> @@ -424,48 +405,31 @@ void msix_reset(PCIDevice *dev)
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    msix_clear_all_vectors(dev);
>      dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
>  	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
>      memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
>      msix_mask_all(dev, dev->msix_entries_nr);
>  }
>  
> -/* PCI spec suggests that devices make it possible for software to configure
> - * less vectors than supported by the device, but does not specify a standard
> - * mechanism for devices to do so.
> - *
> - * We support this by asking devices to declare vectors software is going to
> - * actually use, and checking this on the notification path. Devices that
> - * don't want to follow the spec suggestion can declare all vectors as used. */
> -
> -/* Mark vector as used. */
> -int msix_vector_use(PCIDevice *dev, unsigned vector)
> +/* Clear pending vector. */
> +void msix_clear_vector(PCIDevice *dev, unsigned vector)
>  {
> -    if (vector >= dev->msix_entries_nr)
> -        return -EINVAL;
> -    ++dev->msix_entry_used[vector];
> -    return 0;
> -}
> -
> -/* Mark vector as unused. */
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector)
> -{
> -    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
> -        return;
> -    }
> -    if (--dev->msix_entry_used[vector]) {
> -        return;
> +    if (msix_present(dev) && vector < dev->msix_entries_nr) {
> +        msix_clr_pending(dev, vector);
>      }
> -    msix_clr_pending(dev, vector);
>  }
>  
> -void msix_unuse_all_vectors(PCIDevice *dev)
> +void msix_clear_all_vectors(PCIDevice *dev)
>  {
> +    unsigned int vector;
> +
>      if (!msix_present(dev)) {
>          return;
>      }
> -    msix_free_irq_entries(dev);
> +    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
> +        msix_clr_pending(dev, vector);
> +    }
>  }
>  
>  /* Invoke the notifier if vector entry is used and unmasked. */
> @@ -476,7 +440,7 @@ msix_notify_if_unmasked(PCIDevice *dev, unsigned int vector, bool masked)
>  
>      assert(dev->msix_vector_config_notifier);
>  
> -    if (!dev->msix_entry_used[vector] || msix_is_masked(dev, vector)) {
> +    if (msix_is_masked(dev, vector)) {
>          return 0;
>      }
>      msix_message_from_vector(dev, vector, &msg);
> diff --git a/hw/msix.h b/hw/msix.h
> index 978f417..9cd54cf 100644
> --- a/hw/msix.h
> +++ b/hw/msix.h
> @@ -21,9 +21,8 @@ int msix_present(PCIDevice *dev);
>  
>  uint32_t msix_bar_size(PCIDevice *dev);
>  
> -int msix_vector_use(PCIDevice *dev, unsigned vector);
> -void msix_vector_unuse(PCIDevice *dev, unsigned vector);
> -void msix_unuse_all_vectors(PCIDevice *dev);
> +void msix_clear_vector(PCIDevice *dev, unsigned vector);
> +void msix_clear_all_vectors(PCIDevice *dev);
>  
>  void msix_notify(PCIDevice *dev, unsigned vector);
>  
> diff --git a/hw/pci.h b/hw/pci.h
> index d7a652e..5cf9a16 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -178,8 +178,6 @@ struct PCIDevice {
>      uint8_t *msix_table_page;
>      /* MMIO index used to map MSIX table and pending bit entries. */
>      MemoryRegion msix_mmio;
> -    /* Reference-count for entries actually in use by driver. */
> -    unsigned *msix_entry_used;
>      /* Region including the MSI-X table */
>      uint32_t msix_bar_size;
>      /* Version id needed for VMState */
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 85d6771..5004d7d 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -136,9 +136,6 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
>      } else {
>          proxy->vdev->config_vector = VIRTIO_NO_VECTOR;
>      }
> -    if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector);
> -    }
>      return 0;
>  }
>  
> @@ -152,9 +149,6 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
>          vector = VIRTIO_NO_VECTOR;
>      }
>      virtio_queue_set_vector(proxy->vdev, n, vector);
> -    if (vector != VIRTIO_NO_VECTOR) {
> -        return msix_vector_use(&proxy->pci_dev, vector);
> -    }
>      return 0;
>  }
>  
> @@ -304,7 +298,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>          if (pa == 0) {
>              virtio_pci_stop_ioeventfd(proxy);
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>          else
>              virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
> @@ -331,7 +325,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>  
>          if (vdev->status == 0) {
>              virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +            msix_clear_all_vectors(&proxy->pci_dev);
>          }
>  
>          /* Linux before 2.6.34 sets the device as OK without enabling
> @@ -343,18 +337,20 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
>          }
>          break;
>      case VIRTIO_MSI_CONFIG_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
> +        msix_clear_vector(&proxy->pci_dev, vdev->config_vector);
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          vdev->config_vector = val;
>          break;
>      case VIRTIO_MSI_QUEUE_VECTOR:
> -        msix_vector_unuse(&proxy->pci_dev,
> +        msix_clear_vector(&proxy->pci_dev,
>                            virtio_queue_vector(vdev, vdev->queue_sel));
>          /* Make it possible for guest to discover an error took place. */
> -        if (msix_vector_use(&proxy->pci_dev, val) < 0)
> +        if (val >= vdev->nvectors) {
>              val = VIRTIO_NO_VECTOR;
> +        }
>          virtio_queue_set_vector(vdev, vdev->queue_sel, val);
>          break;
>      default:
> -- 
> 1.7.3.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html