On Tue, 2011-02-01 at 00:02 +0200, Michael S. Tsirkin wrote: > On Mon, Jan 31, 2011 at 02:47:34PM -0700, Alex Williamson wrote: > > On Mon, 2011-01-31 at 23:19 +0200, Michael S. Tsirkin wrote: > > > When MSI is off, each interrupt needs to be bounced through the io > > > thread when it's set/cleared, so vhost-net causes more context switches and > > > higher CPU utilization than userspace virtio which handles networking in > > > the same thread. > > > > > > We'll need to fix this by adding level irq support in kvm irqfd, > > > for now disable vhost-net in these configurations. > > > > > > Added a vhostforce flag to force vhost-net back on. > > > > > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > > > --- > > > > > > Untested, I'll send a pull request later after > > > some testing assuming we've resolved the > > > command line comments to everyone's satisfaction. > > > > > > Changes since v1: > > > added vhostforce to enable vhost for non-MSI guests > > > > I'm still not thrilled with the errno hack. If this is really a generic > > problem, we should better architect the set_guest_notifiers() callback, > > the force option feels like a band-aide. > > I'm still curious if we can't > > be more dynamic about how we setup the set_guest_notifiers() function > > pointer so it only gets set if (force || msix_enabled()) and we can > > limit some of the impact here to vhost. Thanks, > > > > Alex > > I'm not entirely happy with the API either, but I'm sure > playing with the function pointer is not the solution > we are looking for: > > msix is enabled/disabled dynamically. force is static but it is > something net backend knows about, virtio pci doesn't. > So I suspect modifying the callback on the fly will become > messy quite quickly. Isn't there a callback to the device when msix is enabled/disabled? > Well, this API will be easy to tweak after the fact, there's a single > user after all. Gosh, I wish I got to take that attitude when I submit code... ;) Alex > > > hw/vhost.c | 16 +++++++++++----- > > > hw/vhost.h | 3 ++- > > > hw/vhost_net.c | 8 +++++--- > > > hw/vhost_net.h | 2 +- > > > hw/virtio-net.c | 6 ++++-- > > > hw/virtio-pci.c | 6 +++++- > > > hw/virtio.h | 4 +++- > > > net/tap.c | 6 ++++-- > > > qemu-options.hx | 4 +++- > > > 9 files changed, 38 insertions(+), 17 deletions(-) > > > > > > diff --git a/hw/vhost.c b/hw/vhost.c > > > index 6082da2..72df75f 100644 > > > --- a/hw/vhost.c > > > +++ b/hw/vhost.c > > > @@ -581,7 +581,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev, > > > 0, virtio_queue_get_desc_size(vdev, idx)); > > > } > > > > > > -int vhost_dev_init(struct vhost_dev *hdev, int devfd) > > > +int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force) > > > { > > > uint64_t features; > > > int r; > > > @@ -613,6 +613,7 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd) > > > hdev->log_enabled = false; > > > hdev->started = false; > > > cpu_register_phys_memory_client(&hdev->client); > > > + hdev->force = force; > > > return 0; > > > fail: > > > r = -errno; > > > @@ -636,9 +637,12 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) > > > goto fail; > > > } > > > > > > - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true); > > > + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true, > > > + hdev->force); > > > if (r < 0) { > > > - fprintf(stderr, "Error binding guest notifier: %d\n", -r); > > > + if (r != -EVIRTIO_DISABLED) { > > > + fprintf(stderr, "Error binding guest notifier: %d\n", -r); > > > + } > > > goto fail_notifiers; > > > } > > > > > > @@ -686,7 +690,8 @@ fail_vq: > > > } > > > fail_mem: > > > fail_features: > > > - vdev->binding->set_guest_notifiers(vdev->binding_opaque, false); > > > + vdev->binding->set_guest_notifiers(vdev->binding_opaque, false, > > > + hdev->force); > > > fail_notifiers: > > > fail: > > > return r; > > > @@ -704,7 +709,8 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) > > > } > > > vhost_client_sync_dirty_bitmap(&hdev->client, 0, > > > (target_phys_addr_t)~0x0ull); > > > - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false); > > > + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false, > > > + hdev->force); > > > if (r < 0) { > > > fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); > > > fflush(stderr); > > > diff --git a/hw/vhost.h b/hw/vhost.h > > > index 86dd834..d5d4d86 100644 > > > --- a/hw/vhost.h > > > +++ b/hw/vhost.h > > > @@ -38,9 +38,10 @@ struct vhost_dev { > > > bool log_enabled; > > > vhost_log_chunk_t *log; > > > unsigned long long log_size; > > > + bool force; > > > }; > > > > > > -int vhost_dev_init(struct vhost_dev *hdev, int devfd); > > > +int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force); > > > void vhost_dev_cleanup(struct vhost_dev *hdev); > > > int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); > > > void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); > > > diff --git a/hw/vhost_net.c b/hw/vhost_net.c > > > index c068be1..4f57271 100644 > > > --- a/hw/vhost_net.c > > > +++ b/hw/vhost_net.c > > > @@ -81,7 +81,8 @@ static int vhost_net_get_fd(VLANClientState *backend) > > > } > > > } > > > > > > -struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd) > > > +struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd, > > > + bool force) > > > { > > > int r; > > > struct vhost_net *net = qemu_malloc(sizeof *net); > > > @@ -98,7 +99,7 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd) > > > (1 << VHOST_NET_F_VIRTIO_NET_HDR); > > > net->backend = r; > > > > > > - r = vhost_dev_init(&net->dev, devfd); > > > + r = vhost_dev_init(&net->dev, devfd, force); > > > if (r < 0) { > > > goto fail; > > > } > > > @@ -188,7 +189,8 @@ void vhost_net_cleanup(struct vhost_net *net) > > > qemu_free(net); > > > } > > > #else > > > -struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd) > > > +struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd, > > > + bool force) > > > { > > > return NULL; > > > } > > > diff --git a/hw/vhost_net.h b/hw/vhost_net.h > > > index 6c18ff7..8435094 100644 > > > --- a/hw/vhost_net.h > > > +++ b/hw/vhost_net.h > > > @@ -6,7 +6,7 @@ > > > struct vhost_net; > > > typedef struct vhost_net VHostNetState; > > > > > > -VHostNetState *vhost_net_init(VLANClientState *backend, int devfd); > > > +VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force); > > > > > > int vhost_net_start(VHostNetState *net, VirtIODevice *dev); > > > void vhost_net_stop(VHostNetState *net, VirtIODevice *dev); > > > diff --git a/hw/virtio-net.c b/hw/virtio-net.c > > > index ccb3e63..5de3fee 100644 > > > --- a/hw/virtio-net.c > > > +++ b/hw/virtio-net.c > > > @@ -121,8 +121,10 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) > > > if (!n->vhost_started) { > > > int r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev); > > > if (r < 0) { > > > - error_report("unable to start vhost net: %d: " > > > - "falling back on userspace virtio", -r); > > > + if (r != -EVIRTIO_DISABLED) { > > > + error_report("unable to start vhost net: %d: " > > > + "falling back on userspace virtio", -r); > > > + } > > > } else { > > > n->vhost_started = 1; > > > } > > > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c > > > index d07ff97..c93ef11 100644 > > > --- a/hw/virtio-pci.c > > > +++ b/hw/virtio-pci.c > > > @@ -595,12 +595,16 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) > > > return 0; > > > } > > > > > > -static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) > > > +static int virtio_pci_set_guest_notifiers(void *opaque, bool assign, bool force) > > > { > > > VirtIOPCIProxy *proxy = opaque; > > > VirtIODevice *vdev = proxy->vdev; > > > int r, n; > > > > > > + if (assign && !force && !msix_enabled(&proxy->pci_dev)) { > > > + return -EVIRTIO_DISABLED; > > > + } > > > + > > > for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { > > > if (!virtio_queue_get_num(vdev, n)) { > > > break; > > > diff --git a/hw/virtio.h b/hw/virtio.h > > > index d8546d5..156c9a3 100644 > > > --- a/hw/virtio.h > > > +++ b/hw/virtio.h > > > @@ -93,11 +93,13 @@ typedef struct { > > > int (*load_config)(void * opaque, QEMUFile *f); > > > int (*load_queue)(void * opaque, int n, QEMUFile *f); > > > unsigned (*get_features)(void * opaque); > > > - int (*set_guest_notifiers)(void * opaque, bool assigned); > > > + int (*set_guest_notifiers)(void * opaque, bool assigned, bool force); > > > int (*set_host_notifier)(void * opaque, int n, bool assigned); > > > void (*vmstate_change)(void * opaque, bool running); > > > } VirtIOBindings; > > > > > > +#define EVIRTIO_DISABLED ERANGE > > > + > > > #define VIRTIO_PCI_QUEUE_MAX 64 > > > > > > #define VIRTIO_NO_VECTOR 0xffff > > > diff --git a/net/tap.c b/net/tap.c > > > index eada34a..b8cd252 100644 > > > --- a/net/tap.c > > > +++ b/net/tap.c > > > @@ -491,8 +491,10 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan > > > } > > > } > > > > > > - if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd"))) { > > > + if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") || > > > + qemu_opt_get_bool(opts, "vhostforce", false))) { > > > int vhostfd, r; > > > + bool force = qemu_opt_get_bool(opts, "vhostforce", false); > > > if (qemu_opt_get(opts, "vhostfd")) { > > > r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd")); > > > if (r == -1) { > > > @@ -502,7 +504,7 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan > > > } else { > > > vhostfd = -1; > > > } > > > - s->vhost_net = vhost_net_init(&s->nc, vhostfd); > > > + s->vhost_net = vhost_net_init(&s->nc, vhostfd, force); > > > if (!s->vhost_net) { > > > error_report("vhost-net requested but could not be initialized"); > > > return -1; > > > diff --git a/qemu-options.hx b/qemu-options.hx > > > index 898561d..11c93a2 100644 > > > --- a/qemu-options.hx > > > +++ b/qemu-options.hx > > > @@ -1050,7 +1050,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, > > > "-net tap[,vlan=n][,name=str],ifname=name\n" > > > " connect the host TAP network interface to VLAN 'n'\n" > > > #else > > > - "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n" > > > + "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off]\n" > > > " connect the host TAP network interface to VLAN 'n' and use the\n" > > > " network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n" > > > " and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n" > > > @@ -1061,6 +1061,8 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, > > > " use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag\n" > > > " use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition\n" > > > " use vhost=on to enable experimental in kernel accelerator\n" > > > + " (only has effect for virtio guests which use MSIX)\n" > > > + " use vhostforce=on to force vhost on for non-MSIX virtio guests\n" > > > " use 'vhostfd=h' to connect to an already opened vhost net device\n" > > > #endif > > > "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n" > > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html