Re: [PATCH v3 2/3] virtio-pci: Use ioeventfd for virtqueue notify

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 12, 2010 at 01:24:28PM +0000, Stefan Hajnoczi wrote:
> Virtqueue notify is currently handled synchronously in userspace virtio.  This
> prevents the vcpu from executing guest code while hardware emulation code
> handles the notify.
> 
> On systems that support KVM, the ioeventfd mechanism can be used to make
> virtqueue notify a lightweight exit by deferring hardware emulation to the
> iothread and allowing the VM to continue execution.  This model is similar to
> how vhost receives virtqueue notifies.
> 
> The result of this change is improved performance for userspace virtio devices.
> Virtio-blk throughput increases especially for multithreaded scenarios and
> virtio-net transmit throughput increases substantially.
> 
> Some virtio devices are known to have guest drivers which expect a notify to be
> processed synchronously and spin waiting for completion.  Only enable ioeventfd
> for virtio-blk and virtio-net for now.
> 
> Care must be taken not to interfere with vhost-net, which already uses
> ioeventfd host notifiers.  The following list shows the behavior implemented in
> this patch and is designed to take vhost-net into account:
> 
>  * VIRTIO_CONFIG_S_DRIVER_OK -> assign host notifiers, qemu_set_fd_handler(virtio_pci_host_notifier_read)
>  * !VIRTIO_CONFIG_S_DRIVER_OK -> qemu_set_fd_handler(NULL), deassign host notifiers
>  * virtio_pci_set_host_notifier(true) -> qemu_set_fd_handler(NULL)
>  * virtio_pci_set_host_notifier(false) -> qemu_set_fd_handler(virtio_pci_host_notifier_read)
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxxxxxxxxxx>
> ---
>  hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++++++++--------
>  hw/virtio.c     |   14 ++++-
>  hw/virtio.h     |   13 +++++
>  3 files changed, 153 insertions(+), 26 deletions(-)
> 
> Now toggles host notifiers based on VIRTIO_CONFIG_S_DRIVER_OK status changes.
> The cleanest way I could see was to introduce pre and a post set_status()
> callbacks.  They allow a binding to hook status changes, including the status
> change from virtio_reset().
> 
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 549118d..117e855 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -83,6 +83,11 @@
>  /* Flags track per-device state like workarounds for quirks in older guests. */
>  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
>  
> +/* Performance improves when virtqueue kick processing is decoupled from the
> + * vcpu thread using ioeventfd for some devices. */
> +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
> +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
> +
>  /* QEMU doesn't strictly need write barriers since everything runs in
>   * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
>   * KVM or if kqemu gets SMP support.
> @@ -179,12 +184,125 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
>      return 0;
>  }
>  
> +static int virtio_pci_set_host_notifier_ioeventfd(VirtIOPCIProxy *proxy,
> +                                                  int n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    int r;
> +    if (assign) {
> +        r = event_notifier_init(notifier, 1);
> +        if (r < 0) {
> +            return r;
> +        }
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            event_notifier_cleanup(notifier);
> +        }
> +    } else {
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            return r;
> +        }
> +        event_notifier_cleanup(notifier);
> +    }
> +    return r;
> +}
> +
> +static void virtio_pci_host_notifier_read(void *opaque)
> +{
> +    VirtQueue *vq = opaque;
> +    EventNotifier *n = virtio_queue_get_host_notifier(vq);
> +    if (event_notifier_test_and_clear(n)) {
> +        virtio_queue_notify_vq(vq);
> +    }
> +}
> +
> +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
> +                                                    int n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    if (assign) {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            virtio_pci_host_notifier_read, NULL, vq);
> +    } else {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            NULL, NULL, NULL);
> +    }
> +}
> +
> +static int virtio_pci_set_host_notifiers(VirtIOPCIProxy *proxy, bool assign)
> +{
> +    int n, r;
> +
> +    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
> +        if (!virtio_queue_get_num(proxy->vdev, n)) {
> +            continue;
> +        }
> +
> +        if (assign) {
> +            r = virtio_pci_set_host_notifier_ioeventfd(proxy, n, true);
> +            if (r < 0) {
> +                goto assign_error;
> +            }
> +
> +            virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
> +        } else {
> +            virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +            virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
> +        }
> +    }
> +    return 0;
> +
> +assign_error:
> +    while (--n >= 0) {
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +        virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
> +    }
> +    return r;
> +}
> +
> +static void virtio_pci_pre_set_status(void *opaque, uint8_t oldval,
> +                                      uint8_t newval)
> +{
> +    VirtIOPCIProxy *proxy = opaque;
> +    bool changed = (oldval ^ newval) & VIRTIO_CONFIG_S_DRIVER_OK;
> +    bool driver_ok = newval & VIRTIO_CONFIG_S_DRIVER_OK;
> +
> +    /* Bring up host notifiers before virtio device may request them */
> +    if (changed && driver_ok &&
> +        (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD)) {
> +        if (virtio_pci_set_host_notifiers(proxy, true) < 0) {
> +            proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
> +        }
> +    }
> +}
> +
> +static void virtio_pci_post_set_status(void *opaque, uint8_t oldval,
> +                                       uint8_t newval)
> +{
> +    VirtIOPCIProxy *proxy = opaque;
> +    bool changed = (oldval ^ newval) & VIRTIO_CONFIG_S_DRIVER_OK;
> +    bool driver_ok = newval & VIRTIO_CONFIG_S_DRIVER_OK;
> +
> +    /* Take down host notifiers after virtio device releases them */
> +    if (changed && !driver_ok &&
> +        (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD)) {
> +        virtio_pci_set_host_notifiers(proxy, false);
> +    }
> +}
> +

This old/new value in API looks hacky ... how about either
1. only invoking callbacks on real change
or
2. use another flag to save state, making virtio_pci_set_host_notifiers
idempotent

>  static void virtio_pci_reset(DeviceState *d)
>  {
>      VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
>      virtio_reset(proxy->vdev);
>      msix_reset(&proxy->pci_dev);
> -    proxy->flags = 0;
> +    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
>  }
>  
>  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
> @@ -480,30 +598,12 @@ assign_error:
>  static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)

Might be easier to open-code this.

>  {
>      VirtIOPCIProxy *proxy = opaque;
> -    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> -    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> -    int r;
> -    if (assign) {
> -        r = event_notifier_init(notifier, 1);
> -        if (r < 0) {
> -            return r;
> -        }
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            event_notifier_cleanup(notifier);
> -        }
> +    if (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) {

tested both here and by callers?

> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, !assign);
> +        return 0;
>      } else {
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            return r;
> -        }
> -        event_notifier_cleanup(notifier);
> +        return virtio_pci_set_host_notifier_ioeventfd(proxy, n, assign);
>      }
> -    return r;
>  }
>  
>  static const VirtIOBindings virtio_pci_bindings = {
> @@ -515,6 +615,8 @@ static const VirtIOBindings virtio_pci_bindings = {
>      .get_features = virtio_pci_get_features,
>      .set_host_notifier = virtio_pci_set_host_notifier,
>      .set_guest_notifiers = virtio_pci_set_guest_notifiers,
> +    .pre_set_status = virtio_pci_pre_set_status,
> +    .post_set_status = virtio_pci_post_set_status,
>  };
>  
>  static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
> @@ -702,6 +804,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .qdev.props = (Property[]) {
>              DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
>              DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
> +            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
> +                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>              DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_PROP_END_OF_LIST(),
> @@ -714,6 +818,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .exit       = virtio_net_exit_pci,
>          .romfile    = "pxe-virtio.bin",
>          .qdev.props = (Property[]) {
> +            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
> +                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
> diff --git a/hw/virtio.c b/hw/virtio.c
> index a2a657e..1f83b2c 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -574,11 +574,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
>      return vdev->vq[n].vring.num;
>  }
>  
> +void virtio_queue_notify_vq(VirtQueue *vq)
> +{
> +    if (vq->vring.desc) {
> +        VirtIODevice *vdev = vq->vdev;
> +        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> +        vq->handle_output(vdev, vq);
> +    }
> +}
> +
>  void virtio_queue_notify(VirtIODevice *vdev, int n)
>  {
> -    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
> -        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
> -        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
> +    if (n < VIRTIO_PCI_QUEUE_MAX) {
> +        virtio_queue_notify_vq(&vdev->vq[n]);
>      }
>  }
>  
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 02fa312..5767914 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -95,6 +95,8 @@ typedef struct {
>      unsigned (*get_features)(void * opaque);
>      int (*set_guest_notifiers)(void * opaque, bool assigned);
>      int (*set_host_notifier)(void * opaque, int n, bool assigned);
> +    void (*pre_set_status)(void * opaque, uint8_t oldval, uint8_t newval);
> +    void (*post_set_status)(void * opaque, uint8_t oldval, uint8_t newval);
>  } VirtIOBindings;
>  
>  #define VIRTIO_PCI_QUEUE_MAX 64
> @@ -127,10 +129,20 @@ struct VirtIODevice
>  
>  static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
>  {
> +    uint8_t old = vdev->status;
> +
> +    if (vdev->binding->pre_set_status) {
> +        vdev->binding->pre_set_status(vdev->binding_opaque, old, val);
> +    }
> +
>      if (vdev->set_status) {
>          vdev->set_status(vdev, val);
>      }
>      vdev->status = val;
> +
> +    if (vdev->binding->post_set_status) {
> +        vdev->binding->post_set_status(vdev->binding_opaque, old, val);
> +    }
>  }
>  
>  VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
> @@ -219,5 +231,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
>  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
>  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
> +void virtio_queue_notify_vq(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
>  #endif
> -- 
> 1.7.2.3
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux