[PATCH v3 2/3] virtio-pci: Use ioeventfd for virtqueue notify

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Virtqueue notify is currently handled synchronously in userspace virtio.  This
prevents the vcpu from executing guest code while hardware emulation code
handles the notify.

On systems that support KVM, the ioeventfd mechanism can be used to make
virtqueue notify a lightweight exit by deferring hardware emulation to the
iothread and allowing the VM to continue execution.  This model is similar to
how vhost receives virtqueue notifies.

The result of this change is improved performance for userspace virtio devices.
Virtio-blk throughput increases especially for multithreaded scenarios and
virtio-net transmit throughput increases substantially.

Some virtio devices are known to have guest drivers which expect a notify to be
processed synchronously and spin waiting for completion.  Only enable ioeventfd
for virtio-blk and virtio-net for now.

Care must be taken not to interfere with vhost-net, which already uses
ioeventfd host notifiers.  The following list shows the behavior implemented in
this patch and is designed to take vhost-net into account:

 * VIRTIO_CONFIG_S_DRIVER_OK -> assign host notifiers, qemu_set_fd_handler(virtio_pci_host_notifier_read)
 * !VIRTIO_CONFIG_S_DRIVER_OK -> qemu_set_fd_handler(NULL), deassign host notifiers
 * virtio_pci_set_host_notifier(true) -> qemu_set_fd_handler(NULL)
 * virtio_pci_set_host_notifier(false) -> qemu_set_fd_handler(virtio_pci_host_notifier_read)

Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxxxxxxxxxx>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++++++++--------
 hw/virtio.c     |   14 ++++-
 hw/virtio.h     |   13 +++++
 3 files changed, 153 insertions(+), 26 deletions(-)

Now toggles host notifiers based on VIRTIO_CONFIG_S_DRIVER_OK status changes.
The cleanest way I could see was to introduce pre and a post set_status()
callbacks.  They allow a binding to hook status changes, including the status
change from virtio_reset().

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 549118d..117e855 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -83,6 +83,11 @@
 /* Flags track per-device state like workarounds for quirks in older guests. */
 #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
 
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
+
 /* QEMU doesn't strictly need write barriers since everything runs in
  * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
  * KVM or if kqemu gets SMP support.
@@ -179,12 +184,125 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
     return 0;
 }
 
+static int virtio_pci_set_host_notifier_ioeventfd(VirtIOPCIProxy *proxy,
+                                                  int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r;
+    if (assign) {
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            return r;
+        }
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            event_notifier_cleanup(notifier);
+        }
+    } else {
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            return r;
+        }
+        event_notifier_cleanup(notifier);
+    }
+    return r;
+}
+
+static void virtio_pci_host_notifier_read(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *n = virtio_queue_get_host_notifier(vq);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
+                                                    int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    if (assign) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_pci_host_notifier_read, NULL, vq);
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            NULL, NULL, NULL);
+    }
+}
+
+static int virtio_pci_set_host_notifiers(VirtIOPCIProxy *proxy, bool assign)
+{
+    int n, r;
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        if (assign) {
+            r = virtio_pci_set_host_notifier_ioeventfd(proxy, n, true);
+            if (r < 0) {
+                goto assign_error;
+            }
+
+            virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
+        } else {
+            virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+            virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
+        }
+    }
+    return 0;
+
+assign_error:
+    while (--n >= 0) {
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
+    }
+    return r;
+}
+
+static void virtio_pci_pre_set_status(void *opaque, uint8_t oldval,
+                                      uint8_t newval)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    bool changed = (oldval ^ newval) & VIRTIO_CONFIG_S_DRIVER_OK;
+    bool driver_ok = newval & VIRTIO_CONFIG_S_DRIVER_OK;
+
+    /* Bring up host notifiers before virtio device may request them */
+    if (changed && driver_ok &&
+        (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD)) {
+        if (virtio_pci_set_host_notifiers(proxy, true) < 0) {
+            proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+        }
+    }
+}
+
+static void virtio_pci_post_set_status(void *opaque, uint8_t oldval,
+                                       uint8_t newval)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    bool changed = (oldval ^ newval) & VIRTIO_CONFIG_S_DRIVER_OK;
+    bool driver_ok = newval & VIRTIO_CONFIG_S_DRIVER_OK;
+
+    /* Take down host notifiers after virtio device releases them */
+    if (changed && !driver_ok &&
+        (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD)) {
+        virtio_pci_set_host_notifiers(proxy, false);
+    }
+}
+
 static void virtio_pci_reset(DeviceState *d)
 {
     VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
     virtio_reset(proxy->vdev);
     msix_reset(&proxy->pci_dev);
-    proxy->flags = 0;
+    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -480,30 +598,12 @@ assign_error:
 static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
 {
     VirtIOPCIProxy *proxy = opaque;
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    int r;
-    if (assign) {
-        r = event_notifier_init(notifier, 1);
-        if (r < 0) {
-            return r;
-        }
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            event_notifier_cleanup(notifier);
-        }
+    if (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) {
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, !assign);
+        return 0;
     } else {
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            return r;
-        }
-        event_notifier_cleanup(notifier);
+        return virtio_pci_set_host_notifier_ioeventfd(proxy, n, assign);
     }
-    return r;
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -515,6 +615,8 @@ static const VirtIOBindings virtio_pci_bindings = {
     .get_features = virtio_pci_get_features,
     .set_host_notifier = virtio_pci_set_host_notifier,
     .set_guest_notifiers = virtio_pci_set_guest_notifiers,
+    .pre_set_status = virtio_pci_pre_set_status,
+    .post_set_status = virtio_pci_post_set_status,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -702,6 +804,8 @@ static PCIDeviceInfo virtio_info[] = {
         .qdev.props = (Property[]) {
             DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
             DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
             DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_PROP_END_OF_LIST(),
@@ -714,6 +818,8 @@ static PCIDeviceInfo virtio_info[] = {
         .exit       = virtio_net_exit_pci,
         .romfile    = "pxe-virtio.bin",
         .qdev.props = (Property[]) {
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
             DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
diff --git a/hw/virtio.c b/hw/virtio.c
index a2a657e..1f83b2c 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -574,11 +574,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
     return vdev->vq[n].vring.num;
 }
 
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+    if (vq->vring.desc) {
+        VirtIODevice *vdev = vq->vdev;
+        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+        vq->handle_output(vdev, vq);
+    }
+}
+
 void virtio_queue_notify(VirtIODevice *vdev, int n)
 {
-    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
-        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
-        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+    if (n < VIRTIO_PCI_QUEUE_MAX) {
+        virtio_queue_notify_vq(&vdev->vq[n]);
     }
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 02fa312..5767914 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -95,6 +95,8 @@ typedef struct {
     unsigned (*get_features)(void * opaque);
     int (*set_guest_notifiers)(void * opaque, bool assigned);
     int (*set_host_notifier)(void * opaque, int n, bool assigned);
+    void (*pre_set_status)(void * opaque, uint8_t oldval, uint8_t newval);
+    void (*post_set_status)(void * opaque, uint8_t oldval, uint8_t newval);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 64
@@ -127,10 +129,20 @@ struct VirtIODevice
 
 static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
 {
+    uint8_t old = vdev->status;
+
+    if (vdev->binding->pre_set_status) {
+        vdev->binding->pre_set_status(vdev->binding_opaque, old, val);
+    }
+
     if (vdev->set_status) {
         vdev->set_status(vdev, val);
     }
     vdev->status = val;
+
+    if (vdev->binding->post_set_status) {
+        vdev->binding->post_set_status(vdev->binding_opaque, old, val);
+    }
 }
 
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
@@ -219,5 +231,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 #endif
-- 
1.7.2.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux