[RFC][PATCH 28/45] qemu-kvm: msix: Drop tracking of used vectors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This optimization was only required to keep KVM route usage low. Now
that we solve that problem via lazy updates, we can drop the field. We
still need interfaces to clear pending vectors, though (and we have to
make use of them more broadly - but that's unrelated to this patch).

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 hw/ivshmem.c    |   16 ++-----------
 hw/msix.c       |   62 +++++++++++-------------------------------------------
 hw/msix.h       |    5 +--
 hw/pci.h        |    2 -
 hw/virtio-pci.c |   20 +++++++----------
 5 files changed, 26 insertions(+), 79 deletions(-)

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 242fbea..a402c98 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -535,10 +535,8 @@ static uint64_t ivshmem_get_size(IVShmemState * s) {
     return value;
 }
 
-static void ivshmem_setup_msi(IVShmemState * s) {
-
-    int i;
-
+static void ivshmem_setup_msi(IVShmemState *s)
+{
     /* allocate the MSI-X vectors */
 
     memory_region_init(&s->msix_bar, "ivshmem-msix", 4096);
@@ -551,11 +549,6 @@ static void ivshmem_setup_msi(IVShmemState * s) {
         exit(1);
     }
 
-    /* 'activate' the vectors */
-    for (i = 0; i < s->vectors; i++) {
-        msix_vector_use(&s->dev, i);
-    }
-
     /* allocate Qemu char devices for receiving interrupts */
     s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
 }
@@ -581,7 +574,7 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
     IVSHMEM_DPRINTF("ivshmem_load\n");
 
     IVShmemState *proxy = opaque;
-    int ret, i;
+    int ret;
 
     if (version_id > 0) {
         return -EINVAL;
@@ -599,9 +592,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
 
     if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
         msix_load(&proxy->dev, f);
-        for (i = 0; i < proxy->vectors; i++) {
-            msix_vector_use(&proxy->dev, i);
-        }
     } else {
         proxy->intrstatus = qemu_get_be32(f);
         proxy->intrmask = qemu_get_be32(f);
diff --git a/hw/msix.c b/hw/msix.c
index ce3375a..f1b97b5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -292,9 +292,6 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
     if (nentries > MSIX_MAX_ENTRIES)
         return -EINVAL;
 
-    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
-                                        sizeof *dev->msix_entry_used);
-
     dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
     msix_mask_all(dev, nentries);
 
@@ -317,21 +314,9 @@ err_config:
     memory_region_destroy(&dev->msix_mmio);
     g_free(dev->msix_table_page);
     dev->msix_table_page = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
     return ret;
 }
 
-static void msix_free_irq_entries(PCIDevice *dev)
-{
-    int vector;
-
-    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
-        dev->msix_entry_used[vector] = 0;
-        msix_clr_pending(dev, vector);
-    }
-}
-
 /* Clean up resources for the device. */
 int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 {
@@ -340,14 +325,11 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
     }
     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
     dev->msix_cap = 0;
-    msix_free_irq_entries(dev);
     dev->msix_entries_nr = 0;
     memory_region_del_subregion(bar, &dev->msix_mmio);
     memory_region_destroy(&dev->msix_mmio);
     g_free(dev->msix_table_page);
     dev->msix_table_page = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
 
     kvm_msix_free(dev);
     g_free(dev->msix_cache);
@@ -376,7 +358,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
         return;
     }
 
-    msix_free_irq_entries(dev);
     qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
     qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
 }
@@ -407,7 +388,7 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 {
     MSIMessage msg;
 
-    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+    if (vector >= dev->msix_entries_nr)
         return;
     if (msix_is_masked(dev, vector)) {
         msix_set_pending(dev, vector);
@@ -424,48 +405,31 @@ void msix_reset(PCIDevice *dev)
     if (!msix_present(dev)) {
         return;
     }
-    msix_free_irq_entries(dev);
+    msix_clear_all_vectors(dev);
     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
 	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
     memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
     msix_mask_all(dev, dev->msix_entries_nr);
 }
 
-/* PCI spec suggests that devices make it possible for software to configure
- * less vectors than supported by the device, but does not specify a standard
- * mechanism for devices to do so.
- *
- * We support this by asking devices to declare vectors software is going to
- * actually use, and checking this on the notification path. Devices that
- * don't want to follow the spec suggestion can declare all vectors as used. */
-
-/* Mark vector as used. */
-int msix_vector_use(PCIDevice *dev, unsigned vector)
+/* Clear pending vector. */
+void msix_clear_vector(PCIDevice *dev, unsigned vector)
 {
-    if (vector >= dev->msix_entries_nr)
-        return -EINVAL;
-    ++dev->msix_entry_used[vector];
-    return 0;
-}
-
-/* Mark vector as unused. */
-void msix_vector_unuse(PCIDevice *dev, unsigned vector)
-{
-    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
-        return;
-    }
-    if (--dev->msix_entry_used[vector]) {
-        return;
+    if (msix_present(dev) && vector < dev->msix_entries_nr) {
+        msix_clr_pending(dev, vector);
     }
-    msix_clr_pending(dev, vector);
 }
 
-void msix_unuse_all_vectors(PCIDevice *dev)
+void msix_clear_all_vectors(PCIDevice *dev)
 {
+    unsigned int vector;
+
     if (!msix_present(dev)) {
         return;
     }
-    msix_free_irq_entries(dev);
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+        msix_clr_pending(dev, vector);
+    }
 }
 
 /* Invoke the notifier if vector entry is used and unmasked. */
@@ -476,7 +440,7 @@ msix_notify_if_unmasked(PCIDevice *dev, unsigned int vector, bool masked)
 
     assert(dev->msix_vector_config_notifier);
 
-    if (!dev->msix_entry_used[vector] || msix_is_masked(dev, vector)) {
+    if (msix_is_masked(dev, vector)) {
         return 0;
     }
     msix_message_from_vector(dev, vector, &msg);
diff --git a/hw/msix.h b/hw/msix.h
index 978f417..9cd54cf 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -21,9 +21,8 @@ int msix_present(PCIDevice *dev);
 
 uint32_t msix_bar_size(PCIDevice *dev);
 
-int msix_vector_use(PCIDevice *dev, unsigned vector);
-void msix_vector_unuse(PCIDevice *dev, unsigned vector);
-void msix_unuse_all_vectors(PCIDevice *dev);
+void msix_clear_vector(PCIDevice *dev, unsigned vector);
+void msix_clear_all_vectors(PCIDevice *dev);
 
 void msix_notify(PCIDevice *dev, unsigned vector);
 
diff --git a/hw/pci.h b/hw/pci.h
index d7a652e..5cf9a16 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,8 +178,6 @@ struct PCIDevice {
     uint8_t *msix_table_page;
     /* MMIO index used to map MSIX table and pending bit entries. */
     MemoryRegion msix_mmio;
-    /* Reference-count for entries actually in use by driver. */
-    unsigned *msix_entry_used;
     /* Region including the MSI-X table */
     uint32_t msix_bar_size;
     /* Version id needed for VMState */
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 85d6771..5004d7d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -136,9 +136,6 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
     } else {
         proxy->vdev->config_vector = VIRTIO_NO_VECTOR;
     }
-    if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) {
-        return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector);
-    }
     return 0;
 }
 
@@ -152,9 +149,6 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
         vector = VIRTIO_NO_VECTOR;
     }
     virtio_queue_set_vector(proxy->vdev, n, vector);
-    if (vector != VIRTIO_NO_VECTOR) {
-        return msix_vector_use(&proxy->pci_dev, vector);
-    }
     return 0;
 }
 
@@ -304,7 +298,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (pa == 0) {
             virtio_pci_stop_ioeventfd(proxy);
             virtio_reset(proxy->vdev);
-            msix_unuse_all_vectors(&proxy->pci_dev);
+            msix_clear_all_vectors(&proxy->pci_dev);
         }
         else
             virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
@@ -331,7 +325,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 
         if (vdev->status == 0) {
             virtio_reset(proxy->vdev);
-            msix_unuse_all_vectors(&proxy->pci_dev);
+            msix_clear_all_vectors(&proxy->pci_dev);
         }
 
         /* Linux before 2.6.34 sets the device as OK without enabling
@@ -343,18 +337,20 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         }
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
-        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        msix_clear_vector(&proxy->pci_dev, vdev->config_vector);
         /* Make it possible for guest to discover an error took place. */
-        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+        if (val >= vdev->nvectors) {
             val = VIRTIO_NO_VECTOR;
+        }
         vdev->config_vector = val;
         break;
     case VIRTIO_MSI_QUEUE_VECTOR:
-        msix_vector_unuse(&proxy->pci_dev,
+        msix_clear_vector(&proxy->pci_dev,
                           virtio_queue_vector(vdev, vdev->queue_sel));
         /* Make it possible for guest to discover an error took place. */
-        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+        if (val >= vdev->nvectors) {
             val = VIRTIO_NO_VECTOR;
+        }
         virtio_queue_set_vector(vdev, vdev->queue_sel, val);
         break;
     default:
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux