Re: [RFC V2 PATCH 4/4] virtio-net: add multiqueue support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/01/2012 05:43 PM, Michael S. Tsirkin wrote:
On Mon, Jun 25, 2012 at 06:04:49PM +0800, Jason Wang wrote:
This patch let the virtio-net can transmit and recevie packets through multiuple
VLANClientStates and abstract them as multiple virtqueues to guest. A new
parameter 'queues' were introduced to specify the number of queue pairs.

The main goal for vhost support is to let the multiqueue could be used without
changes in vhost code. So each vhost_net structure were used to track a single
VLANClientState and two virtqueues in the past. As multiple VLANClientState were
stored in the NICState, we can infer the correspond VLANClientState from this
and queue_index easily.

Signed-off-by: Jason Wang<jasowang@xxxxxxxxxx>
Can this patch be split up?
1. extend vhost API to allow multiqueue and minimally tweak virtio
2. add real multiqueue for virtio

Hmm?

Sure, do you think it's necessary to separate the vhost parts of multiqueue from virtio?
---
  hw/vhost.c      |   58 ++++---
  hw/vhost.h      |    1
  hw/vhost_net.c  |    7 +
  hw/vhost_net.h  |    2
  hw/virtio-net.c |  461 +++++++++++++++++++++++++++++++++++++------------------
  hw/virtio-net.h |    3
  6 files changed, 355 insertions(+), 177 deletions(-)

diff --git a/hw/vhost.c b/hw/vhost.c
index 43664e7..6318bb2 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -620,11 +620,12 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
  {
      target_phys_addr_t s, l, a;
      int r;
+    int vhost_vq_index = (idx>  2 ? idx - 1 : idx) % dev->nvqs;
      struct vhost_vring_file file = {
-        .index = idx,
+	.index = vhost_vq_index
      };
      struct vhost_vring_state state = {
-        .index = idx,
+        .index = vhost_vq_index
      };
      struct VirtQueue *vvq = virtio_get_queue(vdev, idx);

@@ -670,11 +671,12 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
          goto fail_alloc_ring;
      }

-    r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
+    r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
      if (r<  0) {
          r = -errno;
          goto fail_alloc;
      }
+
      file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
      r = ioctl(dev->control, VHOST_SET_VRING_KICK,&file);
      if (r) {
@@ -715,7 +717,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
                                      unsigned idx)
  {
      struct vhost_vring_state state = {
-        .index = idx,
+        .index = (idx>  2 ? idx - 1 : idx) % dev->nvqs,
      };
      int r;
      r = ioctl(dev->control, VHOST_GET_VRING_BASE,&state);
@@ -829,7 +831,9 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
      }

      for (i = 0; i<  hdev->nvqs; ++i) {
-        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+					     hdev->start_idx + i,
+					     true);
          if (r<  0) {
              fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
              goto fail_vq;
@@ -839,7 +843,9 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
      return 0;
  fail_vq:
      while (--i>= 0) {
-        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+					     hdev->start_idx + i,
+					     false);
          if (r<  0) {
              fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
              fflush(stderr);
@@ -860,7 +866,9 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
      int i, r;

      for (i = 0; i<  hdev->nvqs; ++i) {
-        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+					     hdev->start_idx + i,
+					     false);
          if (r<  0) {
              fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
              fflush(stderr);
@@ -874,15 +882,17 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
      int i, r;
      if (!vdev->binding->set_guest_notifiers) {
-        fprintf(stderr, "binding does not support guest notifiers\n");
+        fprintf(stderr, "binding does not support guest notifier\n");
          r = -ENOSYS;
          goto fail;
      }

-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
-    if (r<  0) {
-        fprintf(stderr, "Error binding guest notifier: %d\n", -r);
-        goto fail_notifiers;
+    if (hdev->start_idx == 0) {
+        r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
+        if (r<  0) {
+            fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+            goto fail_notifiers;
+        }
      }

      r = vhost_dev_set_features(hdev, hdev->log_enabled);
@@ -898,7 +908,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
          r = vhost_virtqueue_init(hdev,
                                   vdev,
                                   hdev->vqs + i,
-                                 i);
+                                 hdev->start_idx + i);
          if (r<  0) {
              goto fail_vq;
          }
@@ -925,11 +935,13 @@ fail_vq:
          vhost_virtqueue_cleanup(hdev,
                                  vdev,
                                  hdev->vqs + i,
-                                i);
+                                hdev->start_idx + i);
      }
+    i = hdev->nvqs;
  fail_mem:
  fail_features:
-    vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    if (hdev->start_idx == 0)
+        vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
  fail_notifiers:
  fail:
      return r;
@@ -944,18 +956,22 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
          vhost_virtqueue_cleanup(hdev,
                                  vdev,
                                  hdev->vqs + i,
-                                i);
+                                hdev->start_idx + i);
      }
+
      for (i = 0; i<  hdev->n_mem_sections; ++i) {
          vhost_sync_dirty_bitmap(hdev,&hdev->mem_sections[i],
                                  0, (target_phys_addr_t)~0x0ull);
      }
-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
-    if (r<  0) {
-        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
-        fflush(stderr);
+
+    if (hdev->start_idx == 0) {
+	r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+	if (r<  0) {
+	    fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
+	    fflush(stderr);
+	}
+	assert (r>= 0);
      }
-    assert (r>= 0);

      hdev->started = false;
      g_free(hdev->log);
diff --git a/hw/vhost.h b/hw/vhost.h
index 80e64df..fa5357a 100644
--- a/hw/vhost.h
+++ b/hw/vhost.h
@@ -34,6 +34,7 @@ struct vhost_dev {
      MemoryRegionSection *mem_sections;
      struct vhost_virtqueue *vqs;
      int nvqs;
+    int start_idx;
      unsigned long long features;
      unsigned long long acked_features;
      unsigned long long backend_features;
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index f672e9d..73a72bb 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -138,13 +138,15 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
  }

  int vhost_net_start(struct vhost_net *net,
-                    VirtIODevice *dev)
+                    VirtIODevice *dev,
+                    int start_idx)
  {
      struct vhost_vring_file file = { };
      int r;

      net->dev.nvqs = 2;
      net->dev.vqs = net->vqs;
+    net->dev.start_idx = start_idx;

      r = vhost_dev_enable_notifiers(&net->dev, dev);
      if (r<  0) {
@@ -227,7 +229,8 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
  }

  int vhost_net_start(struct vhost_net *net,
-		    VirtIODevice *dev)
+                    VirtIODevice *dev,
+                    int start_idx)
  {
      return -ENOSYS;
  }
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
index 91e40b1..79a4f09 100644
--- a/hw/vhost_net.h
+++ b/hw/vhost_net.h
@@ -9,7 +9,7 @@ typedef struct vhost_net VHostNetState;
  VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force);

  bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
-int vhost_net_start(VHostNetState *net, VirtIODevice *dev);
+int vhost_net_start(VHostNetState *net, VirtIODevice *dev, int start_idx);
  void vhost_net_stop(VHostNetState *net, VirtIODevice *dev);

  void vhost_net_cleanup(VHostNetState *net);
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 3f190d4..d42c4cc 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -26,34 +26,43 @@
  #define MAC_TABLE_ENTRIES    64
  #define MAX_VLAN    (1<<  12)   /* Per 802.1Q definition */

-typedef struct VirtIONet
+struct VirtIONet;
+
+typedef struct VirtIONetQueue
  {
-    VirtIODevice vdev;
-    uint8_t mac[ETH_ALEN];
-    uint16_t status;
      VirtQueue *rx_vq;
      VirtQueue *tx_vq;
-    VirtQueue *ctrl_vq;
-    NICState *nic;
      QEMUTimer *tx_timer;
      QEMUBH *tx_bh;
      uint32_t tx_timeout;
-    int32_t tx_burst;
      int tx_waiting;
-    uint32_t has_vnet_hdr;
-    uint8_t has_ufo;
      struct {
          VirtQueueElement elem;
          ssize_t len;
      } async_tx;
+    struct VirtIONet *n;
+    uint8_t vhost_started;
+} VirtIONetQueue;
+
+typedef struct VirtIONet
+{
+    VirtIODevice vdev;
+    uint8_t mac[ETH_ALEN];
+    uint16_t status;
+    VirtIONetQueue vqs[MAX_QUEUE_NUM];
+    VirtQueue *ctrl_vq;
+    NICState *nic;
+    int32_t tx_burst;
+    uint32_t has_vnet_hdr;
+    uint8_t has_ufo;
      int mergeable_rx_bufs;
+    int multiqueue;
      uint8_t promisc;
      uint8_t allmulti;
      uint8_t alluni;
      uint8_t nomulti;
      uint8_t nouni;
      uint8_t nobcast;
-    uint8_t vhost_started;
      struct {
          int in_use;
          int first_multi;
@@ -63,6 +72,7 @@ typedef struct VirtIONet
      } mac_table;
      uint32_t *vlans;
      DeviceState *qdev;
+    uint32_t queues;
  } VirtIONet;

  /* TODO
@@ -74,12 +84,25 @@ static VirtIONet *to_virtio_net(VirtIODevice *vdev)
      return (VirtIONet *)vdev;
  }

+static int vq_get_pair_index(VirtIONet *n, VirtQueue *vq)
+{
+    int i;
+    for (i = 0; i<  n->queues; i++) {
+        if (n->vqs[i].tx_vq == vq || n->vqs[i].rx_vq == vq) {
+            return i;
+        }
+    }
+    assert(1);
+    return -1;
+}
+
  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  {
      VirtIONet *n = to_virtio_net(vdev);
      struct virtio_net_config netcfg;

      stw_p(&netcfg.status, n->status);
+    netcfg.queues = n->queues * 2;
      memcpy(netcfg.mac, n->mac, ETH_ALEN);
      memcpy(config,&netcfg, sizeof(netcfg));
  }
@@ -103,78 +126,140 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status)
          (n->status&  VIRTIO_NET_S_LINK_UP)&&  n->vdev.vm_running;
  }

-static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+static void nc_vhost_status(VLANClientState *nc, VirtIONet *n,
+                            uint8_t status)
  {
-    if (!n->nic->nc.peer) {
+    int queue_index = nc->queue_index;
+    VLANClientState *peer = nc->peer;
+    VirtIONetQueue *netq =&n->vqs[nc->queue_index];
+
+    if (!peer) {
          return;
      }
-    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+    if (peer->info->type != NET_CLIENT_TYPE_TAP) {
          return;
      }

-    if (!tap_get_vhost_net(n->nic->nc.peer)) {
+    if (!tap_get_vhost_net(peer)) {
          return;
      }
-    if (!!n->vhost_started == virtio_net_started(n, status)&&
-                              !n->nic->nc.peer->link_down) {
+    if (!!netq->vhost_started == virtio_net_started(n, status)&&
+                                 !peer->link_down) {
          return;
      }
-    if (!n->vhost_started) {
-        int r;
-        if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer),&n->vdev)) {
+    if (!netq->vhost_started) {
+	/* skip ctrl vq */
+	int r, start_idx = queue_index == 0 ? 0 : queue_index * 2 + 1;
+        if (!vhost_net_query(tap_get_vhost_net(peer),&n->vdev)) {
              return;
          }
-        r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer),&n->vdev);
+        r = vhost_net_start(tap_get_vhost_net(peer),&n->vdev, start_idx);
          if (r<  0) {
              error_report("unable to start vhost net: %d: "
                           "falling back on userspace virtio", -r);
          } else {
-            n->vhost_started = 1;
+            netq->vhost_started = 1;
          }
      } else {
-        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer),&n->vdev);
-        n->vhost_started = 0;
+        vhost_net_stop(tap_get_vhost_net(peer),&n->vdev);
+        netq->vhost_started = 0;
+    }
+}
+
+static int peer_attach(VirtIONet *n, int index)
+{
+    if (!n->nic->ncs[index]->peer) {
+	return -1;
+    }
+
+    if (n->nic->ncs[index]->peer->info->type != NET_CLIENT_TYPE_TAP) {
+	return -1;
+    }
+
+    return tap_attach(n->nic->ncs[index]->peer);
+}
+
+static int peer_detach(VirtIONet *n, int index)
+{
+    if (!n->nic->ncs[index]->peer) {
+	return -1;
+    }
+
+    if (n->nic->ncs[index]->peer->info->type != NET_CLIENT_TYPE_TAP) {
+	return -1;
+    }
+
+    return tap_detach(n->nic->ncs[index]->peer);
+}
+
+static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+{
+    int i;
+    for (i = 0; i<  n->queues; i++) {
+	if (!n->multiqueue&&  i != 0)
+	    status = 0;
+        nc_vhost_status(n->nic->ncs[i], n, status);
      }
  }

  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
  {
      VirtIONet *n = to_virtio_net(vdev);
+    int i;

      virtio_net_vhost_status(n, status);

-    if (!n->tx_waiting) {
-        return;
-    }
+    for (i = 0; i<  n->queues; i++) {
+        VirtIONetQueue *netq =&n->vqs[i];
+        if (!netq->tx_waiting) {
+            continue;
+        }
+
+	if (!n->multiqueue&&  i != 0)
+	    status = 0;

-    if (virtio_net_started(n, status)&&  !n->vhost_started) {
-        if (n->tx_timer) {
-            qemu_mod_timer(n->tx_timer,
-                           qemu_get_clock_ns(vm_clock) + n->tx_timeout);
+        if (virtio_net_started(n, status)&&  !netq->vhost_started) {
+            if (netq->tx_timer) {
+                qemu_mod_timer(netq->tx_timer,
+                               qemu_get_clock_ns(vm_clock) + netq->tx_timeout);
+            } else {
+                qemu_bh_schedule(netq->tx_bh);
+            }
          } else {
-            qemu_bh_schedule(n->tx_bh);
+            if (netq->tx_timer) {
+                qemu_del_timer(netq->tx_timer);
+            } else {
+                qemu_bh_cancel(netq->tx_bh);
+            }
          }
-    } else {
-        if (n->tx_timer) {
-            qemu_del_timer(n->tx_timer);
-        } else {
-            qemu_bh_cancel(n->tx_bh);
+    }
+}
+
+static bool virtio_net_is_link_up(VirtIONet *n)
+{
+    int i;
+    for (i = 0; i<  n->queues; i++) {
+        if (n->nic->ncs[i]->link_down) {
+            return false;
          }
      }
+    return true;
  }

  static void virtio_net_set_link_status(VLANClientState *nc)
  {
-    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    VirtIONet *n = ((NICState *)(nc->opaque))->opaque;
      uint16_t old_status = n->status;

-    if (nc->link_down)
+    if (virtio_net_is_link_up(n)) {
          n->status&= ~VIRTIO_NET_S_LINK_UP;
-    else
+    } else {
          n->status |= VIRTIO_NET_S_LINK_UP;
+    }

-    if (n->status != old_status)
+    if (n->status != old_status) {
          virtio_notify_config(&n->vdev);
+    }

      virtio_net_set_status(&n->vdev, n->vdev.status);
  }
@@ -202,13 +287,15 @@ static void virtio_net_reset(VirtIODevice *vdev)

  static int peer_has_vnet_hdr(VirtIONet *n)
  {
-    if (!n->nic->nc.peer)
+    if (!n->nic->ncs[0]->peer) {
          return 0;
+    }

-    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
+    if (n->nic->ncs[0]->peer->info->type != NET_CLIENT_TYPE_TAP) {
          return 0;
+    }

-    n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
+    n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->ncs[0]->peer);

      return n->has_vnet_hdr;
  }
@@ -218,7 +305,7 @@ static int peer_has_ufo(VirtIONet *n)
      if (!peer_has_vnet_hdr(n))
          return 0;

-    n->has_ufo = tap_has_ufo(n->nic->nc.peer);
+    n->has_ufo = tap_has_ufo(n->nic->ncs[0]->peer);

      return n->has_ufo;
  }
@@ -228,9 +315,13 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
      VirtIONet *n = to_virtio_net(vdev);

      features |= (1<<  VIRTIO_NET_F_MAC);
+    features |= (1<<  VIRTIO_NET_F_MULTIQUEUE);

      if (peer_has_vnet_hdr(n)) {
-        tap_using_vnet_hdr(n->nic->nc.peer, 1);
+        int i;
+        for (i = 0; i<  n->queues; i++) {
+            tap_using_vnet_hdr(n->nic->ncs[i]->peer, 1);
+        }
      } else {
          features&= ~(0x1<<  VIRTIO_NET_F_CSUM);
          features&= ~(0x1<<  VIRTIO_NET_F_HOST_TSO4);
@@ -248,14 +339,15 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
          features&= ~(0x1<<  VIRTIO_NET_F_HOST_UFO);
      }

-    if (!n->nic->nc.peer ||
-        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+    if (!n->nic->ncs[0]->peer ||
+        n->nic->ncs[0]->peer->info->type != NET_CLIENT_TYPE_TAP) {
          return features;
      }
-    if (!tap_get_vhost_net(n->nic->nc.peer)) {
+    if (!tap_get_vhost_net(n->nic->ncs[0]->peer)) {
          return features;
      }
-    return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
+    return vhost_net_get_features(tap_get_vhost_net(n->nic->ncs[0]->peer),
+                                  features);
  }

  static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -276,25 +368,38 @@ static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
  static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
  {
      VirtIONet *n = to_virtio_net(vdev);
+    int i, r;

      n->mergeable_rx_bufs = !!(features&  (1<<  VIRTIO_NET_F_MRG_RXBUF));
+    n->multiqueue = !!(features&  (1<<  VIRTIO_NET_F_MULTIQUEUE));

-    if (n->has_vnet_hdr) {
-        tap_set_offload(n->nic->nc.peer,
-                        (features>>  VIRTIO_NET_F_GUEST_CSUM)&  1,
-                        (features>>  VIRTIO_NET_F_GUEST_TSO4)&  1,
-                        (features>>  VIRTIO_NET_F_GUEST_TSO6)&  1,
-                        (features>>  VIRTIO_NET_F_GUEST_ECN)&  1,
-                        (features>>  VIRTIO_NET_F_GUEST_UFO)&  1);
-    }
-    if (!n->nic->nc.peer ||
-        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
-        return;
-    }
-    if (!tap_get_vhost_net(n->nic->nc.peer)) {
-        return;
+    for (i = 0; i<  n->queues; i++) {
+        if (!n->multiqueue&&  i != 0) {
+            r = peer_detach(n, i);
+            assert(r == 0);
+        } else {
+            r = peer_attach(n, i);
+            assert(r == 0);
+
+            if (n->has_vnet_hdr) {
+                tap_set_offload(n->nic->ncs[i]->peer,
+                                (features>>  VIRTIO_NET_F_GUEST_CSUM)&  1,
+                                (features>>  VIRTIO_NET_F_GUEST_TSO4)&  1,
+                                (features>>  VIRTIO_NET_F_GUEST_TSO6)&  1,
+                                (features>>  VIRTIO_NET_F_GUEST_ECN)&  1,
+                                (features>>  VIRTIO_NET_F_GUEST_UFO)&  1);
+            }
+            if (!n->nic->ncs[i]->peer ||
+                n->nic->ncs[i]->peer->info->type != NET_CLIENT_TYPE_TAP) {
+                continue;
+            }
+            if (!tap_get_vhost_net(n->nic->ncs[i]->peer)) {
+                continue;
+            }
+            vhost_net_ack_features(tap_get_vhost_net(n->nic->ncs[i]->peer),
+                                   features);
+        }
      }
-    vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
  }

  static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
@@ -446,7 +551,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
  {
      VirtIONet *n = to_virtio_net(vdev);

-    qemu_flush_queued_packets(&n->nic->nc);
+    qemu_flush_queued_packets(n->nic->ncs[vq_get_pair_index(n, vq)]);

      /* We now have RX buffers, signal to the IO thread to break out of the
       * select to re-poll the tap file descriptor */
@@ -455,36 +560,37 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)

  static int virtio_net_can_receive(VLANClientState *nc)
  {
-    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    int queue_index = nc->queue_index;
+    VirtIONet *n = ((NICState *)nc->opaque)->opaque;
+
      if (!n->vdev.vm_running) {
          return 0;
      }

-    if (!virtio_queue_ready(n->rx_vq) ||
+    if (!virtio_queue_ready(n->vqs[queue_index].rx_vq) ||
          !(n->vdev.status&  VIRTIO_CONFIG_S_DRIVER_OK))
          return 0;

      return 1;
  }

-static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
+static int virtio_net_has_buffers(VirtIONet *n, int bufsize, VirtQueue *vq)
  {
-    if (virtio_queue_empty(n->rx_vq) ||
-        (n->mergeable_rx_bufs&&
-         !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
-        virtio_queue_set_notification(n->rx_vq, 1);
+    if (virtio_queue_empty(vq) || (n->mergeable_rx_bufs&&
+        !virtqueue_avail_bytes(vq, bufsize, 0))) {
+        virtio_queue_set_notification(vq, 1);

          /* To avoid a race condition where the guest has made some buffers
           * available after the above check but before notification was
           * enabled, check for available buffers again.
           */
-        if (virtio_queue_empty(n->rx_vq) ||
-            (n->mergeable_rx_bufs&&
-             !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
+        if (virtio_queue_empty(vq) || (n->mergeable_rx_bufs&&
+            !virtqueue_avail_bytes(vq, bufsize, 0))) {
              return 0;
+        }
      }

-    virtio_queue_set_notification(n->rx_vq, 0);
+    virtio_queue_set_notification(vq, 0);
      return 1;
  }

@@ -595,12 +701,15 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)

  static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
  {
-    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    int queue_index = nc->queue_index;
+    VirtIONet *n = ((NICState *)(nc->opaque))->opaque;
+    VirtQueue *vq = n->vqs[queue_index].rx_vq;
      struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
      size_t guest_hdr_len, offset, i, host_hdr_len;

-    if (!virtio_net_can_receive(&n->nic->nc))
+    if (!virtio_net_can_receive(n->nic->ncs[queue_index])) {
          return -1;
+    }

      /* hdr_len refers to the header we supply to the guest */
      guest_hdr_len = n->mergeable_rx_bufs ?
@@ -608,7 +717,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_


      host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
-    if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
+    if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len, vq))
          return 0;

      if (!receive_filter(n, buf, size))
@@ -623,7 +732,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_

          total = 0;

-        if (virtqueue_pop(n->rx_vq,&elem) == 0) {
+        if (virtqueue_pop(vq,&elem) == 0) {
              if (i == 0)
                  return -1;
              error_report("virtio-net unexpected empty queue: "
@@ -675,47 +784,50 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
          }

          /* signal other side */
-        virtqueue_fill(n->rx_vq,&elem, total, i++);
+        virtqueue_fill(vq,&elem, total, i++);
      }

      if (mhdr) {
          stw_p(&mhdr->num_buffers, i);
      }

-    virtqueue_flush(n->rx_vq, i);
-    virtio_notify(&n->vdev, n->rx_vq);
+    virtqueue_flush(vq, i);
+    virtio_notify(&n->vdev, vq);

      return size;
  }

-static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtIONetQueue *tvq);

  static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
  {
-    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    VirtIONet *n = ((NICState *)nc->opaque)->opaque;
+    VirtIONetQueue *netq =&n->vqs[nc->queue_index];

-    virtqueue_push(n->tx_vq,&n->async_tx.elem, n->async_tx.len);
-    virtio_notify(&n->vdev, n->tx_vq);
+    virtqueue_push(netq->tx_vq,&netq->async_tx.elem, netq->async_tx.len);
+    virtio_notify(&n->vdev, netq->tx_vq);

-    n->async_tx.elem.out_num = n->async_tx.len = 0;
+    netq->async_tx.elem.out_num = netq->async_tx.len;

-    virtio_queue_set_notification(n->tx_vq, 1);
-    virtio_net_flush_tx(n, n->tx_vq);
+    virtio_queue_set_notification(netq->tx_vq, 1);
+    virtio_net_flush_tx(n, netq);
  }

  /* TX */
-static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtIONetQueue *netq)
  {
      VirtQueueElement elem;
      int32_t num_packets = 0;
+    VirtQueue *vq = netq->tx_vq;
+
      if (!(n->vdev.status&  VIRTIO_CONFIG_S_DRIVER_OK)) {
          return num_packets;
      }

      assert(n->vdev.vm_running);

-    if (n->async_tx.elem.out_num) {
-        virtio_queue_set_notification(n->tx_vq, 0);
+    if (netq->async_tx.elem.out_num) {
+        virtio_queue_set_notification(vq, 0);
          return num_packets;
      }

@@ -747,12 +859,12 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
              len += hdr_len;
          }

-        ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
-                                      virtio_net_tx_complete);
+        ret = qemu_sendv_packet_async(n->nic->ncs[vq_get_pair_index(n, vq)],
+                                      out_sg, out_num, virtio_net_tx_complete);
          if (ret == 0) {
-            virtio_queue_set_notification(n->tx_vq, 0);
-            n->async_tx.elem = elem;
-            n->async_tx.len  = len;
+            virtio_queue_set_notification(vq, 0);
+            netq->async_tx.elem = elem;
+            netq->async_tx.len  = len;
              return -EBUSY;
          }

@@ -771,22 +883,23 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
  static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
  {
      VirtIONet *n = to_virtio_net(vdev);
+    VirtIONetQueue *netq =&n->vqs[vq_get_pair_index(n, vq)];

      /* This happens when device was stopped but VCPU wasn't. */
      if (!n->vdev.vm_running) {
-        n->tx_waiting = 1;
+        netq->tx_waiting = 1;
          return;
      }

-    if (n->tx_waiting) {
+    if (netq->tx_waiting) {
          virtio_queue_set_notification(vq, 1);
-        qemu_del_timer(n->tx_timer);
-        n->tx_waiting = 0;
-        virtio_net_flush_tx(n, vq);
+        qemu_del_timer(netq->tx_timer);
+        netq->tx_waiting = 0;
+        virtio_net_flush_tx(n, netq);
      } else {
-        qemu_mod_timer(n->tx_timer,
-                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
-        n->tx_waiting = 1;
+        qemu_mod_timer(netq->tx_timer,
+                       qemu_get_clock_ns(vm_clock) + netq->tx_timeout);
+        netq->tx_waiting = 1;
          virtio_queue_set_notification(vq, 0);
      }
  }
@@ -794,48 +907,53 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
  {
      VirtIONet *n = to_virtio_net(vdev);
+    VirtIONetQueue *netq =&n->vqs[vq_get_pair_index(n, vq)];

-    if (unlikely(n->tx_waiting)) {
+    if (unlikely(netq->tx_waiting)) {
          return;
      }
-    n->tx_waiting = 1;
+    netq->tx_waiting = 1;
      /* This happens when device was stopped but VCPU wasn't. */
      if (!n->vdev.vm_running) {
          return;
      }
      virtio_queue_set_notification(vq, 0);
-    qemu_bh_schedule(n->tx_bh);
+    qemu_bh_schedule(netq->tx_bh);
  }

  static void virtio_net_tx_timer(void *opaque)
  {
-    VirtIONet *n = opaque;
+    VirtIONetQueue *netq = opaque;
+    VirtIONet *n = netq->n;
+
      assert(n->vdev.vm_running);

-    n->tx_waiting = 0;
+    netq->tx_waiting = 0;

      /* Just in case the driver is not ready on more */
      if (!(n->vdev.status&  VIRTIO_CONFIG_S_DRIVER_OK))
          return;

-    virtio_queue_set_notification(n->tx_vq, 1);
-    virtio_net_flush_tx(n, n->tx_vq);
+    virtio_queue_set_notification(netq->tx_vq, 1);
+    virtio_net_flush_tx(n, netq);
  }

  static void virtio_net_tx_bh(void *opaque)
  {
-    VirtIONet *n = opaque;
+    VirtIONetQueue *netq = opaque;
+    VirtQueue *vq = netq->tx_vq;
+    VirtIONet *n = netq->n;
      int32_t ret;

      assert(n->vdev.vm_running);

-    n->tx_waiting = 0;
+    netq->tx_waiting = 0;

      /* Just in case the driver is not ready on more */
      if (unlikely(!(n->vdev.status&  VIRTIO_CONFIG_S_DRIVER_OK)))
          return;

-    ret = virtio_net_flush_tx(n, n->tx_vq);
+    ret = virtio_net_flush_tx(n, netq);
      if (ret == -EBUSY) {
          return; /* Notification re-enable handled by tx_complete */
      }
@@ -843,33 +961,39 @@ static void virtio_net_tx_bh(void *opaque)
      /* If we flush a full burst of packets, assume there are
       * more coming and immediately reschedule */
      if (ret>= n->tx_burst) {
-        qemu_bh_schedule(n->tx_bh);
-        n->tx_waiting = 1;
+        qemu_bh_schedule(netq->tx_bh);
+        netq->tx_waiting = 1;
          return;
      }

      /* If less than a full burst, re-enable notification and flush
       * anything that may have come in while we weren't looking.  If
       * we find something, assume the guest is still active and reschedule */
-    virtio_queue_set_notification(n->tx_vq, 1);
-    if (virtio_net_flush_tx(n, n->tx_vq)>  0) {
-        virtio_queue_set_notification(n->tx_vq, 0);
-        qemu_bh_schedule(n->tx_bh);
-        n->tx_waiting = 1;
+    virtio_queue_set_notification(vq, 1);
+    if (virtio_net_flush_tx(n, netq)>  0) {
+        virtio_queue_set_notification(vq, 0);
+        qemu_bh_schedule(netq->tx_bh);
+        netq->tx_waiting = 1;
      }
  }

  static void virtio_net_save(QEMUFile *f, void *opaque)
  {
      VirtIONet *n = opaque;
+    int i;

      /* At this point, backend must be stopped, otherwise
       * it might keep writing to memory. */
-    assert(!n->vhost_started);
+    for (i = 0; i<  n->queues; i++) {
+        assert(!n->vqs[i].vhost_started);
+    }
      virtio_save(&n->vdev, f);

      qemu_put_buffer(f, n->mac, ETH_ALEN);
-    qemu_put_be32(f, n->tx_waiting);
+    qemu_put_be32(f, n->queues);
+    for (i = 0; i<  n->queues; i++) {
+        qemu_put_be32(f, n->vqs[i].tx_waiting);
+    }
      qemu_put_be32(f, n->mergeable_rx_bufs);
      qemu_put_be16(f, n->status);
      qemu_put_byte(f, n->promisc);
@@ -902,7 +1026,10 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
      }

      qemu_get_buffer(f, n->mac, ETH_ALEN);
-    n->tx_waiting = qemu_get_be32(f);
+    n->queues = qemu_get_be32(f);
+    for (i = 0; i<  n->queues; i++) {
+        n->vqs[i].tx_waiting = qemu_get_be32(f);
+    }
      n->mergeable_rx_bufs = qemu_get_be32(f);

      if (version_id>= 3)
@@ -930,7 +1057,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
              n->mac_table.in_use = 0;
          }
      }
-
+
      if (version_id>= 6)
          qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN>>  3);

@@ -941,13 +1068,16 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
          }

          if (n->has_vnet_hdr) {
-            tap_using_vnet_hdr(n->nic->nc.peer, 1);
-            tap_set_offload(n->nic->nc.peer,
-                    (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_CSUM)&  1,
-                    (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_TSO4)&  1,
-                    (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_TSO6)&  1,
-                    (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_ECN)&  1,
-                    (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_UFO)&  1);
+            for(i = 0; i<  n->queues; i++) {
+                tap_using_vnet_hdr(n->nic->ncs[i]->peer, 1);
+                tap_set_offload(n->nic->ncs[i]->peer,
+                        (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_CSUM)&  1,
+                        (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_TSO4)&  1,
+                        (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_TSO6)&  1,
+                        (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_ECN)&  1,
+                        (n->vdev.guest_features>>  VIRTIO_NET_F_GUEST_UFO)&
+                        1);
+           }
          }
      }

@@ -982,7 +1112,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)

  static void virtio_net_cleanup(VLANClientState *nc)
  {
-    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    VirtIONet *n = ((NICState *)nc->opaque)->opaque;

      n->nic = NULL;
  }
@@ -1000,6 +1130,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                                virtio_net_conf *net)
  {
      VirtIONet *n;
+    int i;

      n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
                                          sizeof(struct virtio_net_config),
@@ -1012,7 +1143,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
      n->vdev.bad_features = virtio_net_bad_features;
      n->vdev.reset = virtio_net_reset;
      n->vdev.set_status = virtio_net_set_status;
-    n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);

      if (net->tx&&  strcmp(net->tx, "timer")&&  strcmp(net->tx, "bh")) {
          error_report("virtio-net: "
@@ -1021,15 +1151,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
          error_report("Defaulting to \"bh\"");
      }

-    if (net->tx&&  !strcmp(net->tx, "timer")) {
-        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
-        n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
-        n->tx_timeout = net->txtimer;
-    } else {
-        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
-        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
-    }
-    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
      qemu_macaddr_default_if_unset(&conf->macaddr);
      memcpy(&n->mac[0],&conf->macaddr, sizeof(n->mac));
      n->status = VIRTIO_NET_S_LINK_UP;
@@ -1038,7 +1159,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,

      qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);

-    n->tx_waiting = 0;
      n->tx_burst = net->txburst;
      n->mergeable_rx_bufs = 0;
      n->promisc = 1; /* for compatibility */
@@ -1046,6 +1166,32 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
      n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);

      n->vlans = g_malloc0(MAX_VLAN>>  3);
+    n->queues = conf->queues;
+
+    /* Allocate per rx/tx vq's */
+    for (i = 0; i<  n->queues; i++) {
+        n->vqs[i].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+        if (net->tx&&  !strcmp(net->tx, "timer")) {
+            n->vqs[i].tx_vq = virtio_add_queue(&n->vdev, 256,
+                                               virtio_net_handle_tx_timer);
+            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
+                                                   virtio_net_tx_timer,
+&n->vqs[i]);
+            n->vqs[i].tx_timeout = net->txtimer;
+        } else {
+            n->vqs[i].tx_vq = virtio_add_queue(&n->vdev, 256,
+                                               virtio_net_handle_tx_bh);
+            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh,&n->vqs[i]);
+        }
+
+        n->vqs[i].tx_waiting = 0;
+        n->vqs[i].n = n;
+
+        if (i == 0) {
+            /* keep compatiable with spec and old guest */
+            n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
+        }
+    }

      n->qdev = dev;
      register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
@@ -1059,24 +1205,33 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
  void virtio_net_exit(VirtIODevice *vdev)
  {
      VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
+    int i;

      /* This will stop vhost backend if appropriate. */
      virtio_net_set_status(vdev, 0);

-    qemu_purge_queued_packets(&n->nic->nc);
+    for (i = 0; i<  n->queues; i++) {
+        qemu_purge_queued_packets(n->nic->ncs[i]);
+    }

      unregister_savevm(n->qdev, "virtio-net", n);

      g_free(n->mac_table.macs);
      g_free(n->vlans);

-    if (n->tx_timer) {
-        qemu_del_timer(n->tx_timer);
-        qemu_free_timer(n->tx_timer);
-    } else {
-        qemu_bh_delete(n->tx_bh);
+    for (i = 0; i<  n->queues; i++) {
+        VirtIONetQueue *netq =&n->vqs[i];
+        if (netq->tx_timer) {
+            qemu_del_timer(netq->tx_timer);
+            qemu_free_timer(netq->tx_timer);
+        } else {
+            qemu_bh_delete(netq->tx_bh);
+        }
      }

-    qemu_del_vlan_client(&n->nic->nc);
      virtio_cleanup(&n->vdev);
+
+    for (i = 0; i<  n->queues; i++) {
+        qemu_del_vlan_client(n->nic->ncs[i]);
+    }
  }
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 36aa463..b35ba5d 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,6 +44,7 @@
  #define VIRTIO_NET_F_CTRL_RX    18      /* Control channel RX mode support */
  #define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering */
  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
+#define VIRTIO_NET_F_MULTIQUEUE   22

  #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */

@@ -72,6 +73,8 @@ struct virtio_net_config
      uint8_t mac[ETH_ALEN];
      /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
      uint16_t status;
+
+    uint16_t queues;
  } QEMU_PACKED;

  /* This is the first element of the scatter-gather list.  If you don't
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux