[RFC V3 2/5] tap: multiqueue support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some operating system ( such as Linux ) supports multiqueue tap, this is done
through attaching multiple sockets to the net device and expose multiple file
descriptors.

This patch let qemu utilizes this kind of backend, and introduces helpter for:

- creating a multiple capable tap device
- increase and decrease the number of queues by introducing helpter to attach or
  detach a file descriptor to the device

Then qemu can use this as the infrastructures of emulating a multiple queue
capable network cards.

Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
---
 net.c             |    4 +
 net/tap-aix.c     |   13 +++-
 net/tap-bsd.c     |   13 +++-
 net/tap-haiku.c   |   13 +++-
 net/tap-linux.c   |   56 +++++++++++++++-
 net/tap-linux.h   |    4 +
 net/tap-solaris.c |   13 +++-
 net/tap-win32.c   |   11 +++
 net/tap.c         |  197 ++++++++++++++++++++++++++++++++++-------------------
 net/tap.h         |    7 ++-
 10 files changed, 253 insertions(+), 78 deletions(-)

diff --git a/net.c b/net.c
index 4aa416c..eabe830 100644
--- a/net.c
+++ b/net.c
@@ -978,6 +978,10 @@ static const struct {
                 .name = "vhostforce",
                 .type = QEMU_OPT_BOOL,
                 .help = "force vhost on for non-MSIX virtio guests",
+            }, {
+                .name = "queues",
+                .type = QEMU_OPT_NUMBER,
+                .help = "number of queues the backend can provides",
         },
 #endif /* _WIN32 */
             { /* end of list */ }
diff --git a/net/tap-aix.c b/net/tap-aix.c
index e19aaba..f111e0f 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@
 #include "net/tap.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach)
 {
     fprintf(stderr, "no tap on AIX\n");
     return -1;
@@ -59,3 +60,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd, const char *ifname)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd, const char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 937a94b..44f3421 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -33,7 +33,8 @@
 #include <net/if_tap.h>
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach)
 {
     int fd;
 #ifdef TAPGIFNAME
@@ -145,3 +146,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd, const char *ifname)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd, const char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index 91dda8e..6fb6719 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -25,7 +25,8 @@
 #include "net/tap.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach)
 {
     fprintf(stderr, "no tap on Haiku\n");
     return -1;
@@ -59,3 +60,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd, const char *ifname)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd, const char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 41d581b..ed0afe9 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -35,7 +35,8 @@
 
 #define PATH_NET_TUN "/dev/net/tun"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach)
 {
     struct ifreq ifr;
     int fd, ret;
@@ -47,6 +48,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required
     }
     memset(&ifr, 0, sizeof(ifr));
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+    if (!attach)
+        ifr.ifr_flags |= IFF_MULTI_QUEUE;
 
     if (*vnet_hdr) {
         unsigned int features;
@@ -71,7 +74,11 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required
         pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
     else
         pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
-    ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
+    if (attach) {
+        ifr.ifr_flags |= IFF_ATTACH_QUEUE;
+        ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+    } else
+        ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
     if (ret != 0) {
         if (ifname[0] != '\0') {
             error_report("could not configure %s (%s): %m", PATH_NET_TUN, ifr.ifr_name);
@@ -197,3 +204,48 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
         }
     }
 }
+
+/* Attach a file descriptor to a TUN/TAP device. This descriptor should be
+ * detached before.
+ */
+int tap_fd_attach(int fd, const char *ifname)
+{
+    struct ifreq ifr;
+    int ret;
+
+    memset(&ifr, 0, sizeof(ifr));
+
+    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_ATTACH_QUEUE;
+    pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+    ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+
+    if (ret != 0) {
+        error_report("could not attach to %s", ifname);
+    }
+
+    return ret;
+}
+
+/* Detach a file descriptor to a TUN/TAP device. This file descriptor must have
+ * been attach to a device.
+ */
+int tap_fd_detach(int fd, const char *ifname)
+{
+    struct ifreq ifr;
+    int ret;
+
+    memset(&ifr, 0, sizeof(ifr));
+
+    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_DETACH_QUEUE;
+    pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+    ret = ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+
+    if (ret != 0) {
+        error_report("could not detach to %s", ifname);
+    }
+
+    return ret;
+}
+
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 659e981..648d29f 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -29,6 +29,7 @@
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
 
 #endif
 
@@ -36,6 +37,9 @@
 #define IFF_TAP		0x0002
 #define IFF_NO_PI	0x1000
 #define IFF_VNET_HDR	0x4000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index cf76463..f7c8e8d 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -173,7 +173,8 @@ static int tap_alloc(char *dev, size_t dev_size)
     return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach)
 {
     char  dev[10]="";
     int fd;
@@ -225,3 +226,13 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_attach(int fd, const char *ifname)
+{
+    return -1;
+}
+
+int tap_fd_detach(int fd, const char *ifname)
+{
+    return -1;
+}
diff --git a/net/tap-win32.c b/net/tap-win32.c
index a801a55..dae1c00 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -749,3 +749,14 @@ struct vhost_net *tap_get_vhost_net(VLANClientState *nc)
 {
     return NULL;
 }
+
+int tap_attach(VLANClientState *nc)
+{
+    return -1;
+}
+
+int tap_detach(VLANClientState *nc)
+{
+    return -1;
+}
+
diff --git a/net/tap.c b/net/tap.c
index 5ac4ba3..65457e5 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -53,11 +53,13 @@ typedef struct TAPState {
     int fd;
     char down_script[1024];
     char down_script_arg[128];
+    char ifname[128];
     uint8_t buf[TAP_BUFSIZE];
     unsigned int read_poll : 1;
     unsigned int write_poll : 1;
     unsigned int using_vnet_hdr : 1;
     unsigned int has_ufo: 1;
+    unsigned int enabled:1;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
 } TAPState;
@@ -248,8 +250,10 @@ void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
            len == sizeof(struct virtio_net_hdr));
 
-    tap_fd_set_vnet_hdr_len(s->fd, len);
-    s->host_vnet_hdr_len = len;
+    if (s->enabled) {
+        tap_fd_set_vnet_hdr_len(s->fd, len);
+        s->host_vnet_hdr_len = len;
+    }
 }
 
 void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
@@ -546,7 +550,7 @@ int net_init_bridge(QemuOpts *opts, const char *name, VLANState *vlan)
     return 0;
 }
 
-static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
+static int net_tap_init(QemuOpts *opts, int *vnet_hdr, int attach)
 {
     int fd, vnet_hdr_required;
     char ifname[128] = {0,};
@@ -563,7 +567,9 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
         vnet_hdr_required = 0;
     }
 
-    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
+    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required,
+                      attach));
+
     if (fd < 0) {
         return -1;
     }
@@ -572,7 +578,7 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
     if (setup_script &&
         setup_script[0] != '\0' &&
         strcmp(setup_script, "no") != 0 &&
-        launch_script(setup_script, ifname, fd)) {
+        (!attach && launch_script(setup_script, ifname, fd))) {
         close(fd);
         return -1;
     }
@@ -582,74 +588,11 @@ static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
     return fd;
 }
 
-int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan)
+static int __net_init_tap(QemuOpts *opts, Monitor *mon, const char *name,
+                          VLANState *vlan, int fd, int vnet_hdr)
 {
-    TAPState *s;
-    int fd, vnet_hdr = 0;
-    const char *model;
-
-    if (qemu_opt_get(opts, "fd")) {
-        if (qemu_opt_get(opts, "ifname") ||
-            qemu_opt_get(opts, "script") ||
-            qemu_opt_get(opts, "downscript") ||
-            qemu_opt_get(opts, "vnet_hdr") ||
-            qemu_opt_get(opts, "helper")) {
-            error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "and helper= are invalid with fd=");
-            return -1;
-        }
-
-        fd = net_handle_fd_param(cur_mon, qemu_opt_get(opts, "fd"));
-        if (fd == -1) {
-            return -1;
-        }
+    TAPState *s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
 
-        fcntl(fd, F_SETFL, O_NONBLOCK);
-
-        vnet_hdr = tap_probe_vnet_hdr(fd);
-
-        model = "tap";
-
-    } else if (qemu_opt_get(opts, "helper")) {
-        if (qemu_opt_get(opts, "ifname") ||
-            qemu_opt_get(opts, "script") ||
-            qemu_opt_get(opts, "downscript") ||
-            qemu_opt_get(opts, "vnet_hdr")) {
-            error_report("ifname=, script=, downscript=, and vnet_hdr= "
-                         "are invalid with helper=");
-            return -1;
-        }
-
-        fd = net_bridge_run_helper(qemu_opt_get(opts, "helper"),
-                                   DEFAULT_BRIDGE_INTERFACE);
-        if (fd == -1) {
-            return -1;
-        }
-
-        fcntl(fd, F_SETFL, O_NONBLOCK);
-
-        vnet_hdr = tap_probe_vnet_hdr(fd);
-
-        model = "bridge";
-
-    } else {
-        if (!qemu_opt_get(opts, "script")) {
-            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
-        }
-
-        if (!qemu_opt_get(opts, "downscript")) {
-            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
-        }
-
-        fd = net_tap_init(opts, &vnet_hdr);
-        if (fd == -1) {
-            return -1;
-        }
-
-        model = "tap";
-    }
-
-    s = net_tap_fd_init(vlan, model, name, fd, vnet_hdr);
     if (!s) {
         close(fd);
         return -1;
@@ -671,6 +614,7 @@ int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan)
         script     = qemu_opt_get(opts, "script");
         downscript = qemu_opt_get(opts, "downscript");
 
+        pstrcpy(s->ifname, sizeof(s->ifname), ifname);
         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                  "ifname=%s,script=%s,downscript=%s",
                  ifname, script, downscript);
@@ -704,6 +648,84 @@ int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan)
         return -1;
     }
 
+    s->enabled = 1;
+    return 0;
+}
+
+int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan)
+{
+    int i, fd, vnet_hdr = 0;
+    int numqueues = qemu_opt_get_number(opts, "queues", 1);
+
+    if (qemu_opt_get(opts, "fd")) {
+        const char *fdp[16];
+        if (qemu_opt_get(opts, "ifname") ||
+            qemu_opt_get(opts, "script") ||
+            qemu_opt_get(opts, "downscript") ||
+            qemu_opt_get(opts, "vnet_hdr") ||
+            qemu_opt_get(opts, "helper")) {
+            error_report("ifname=, script=, downscript=, vnet_hdr=, "
+                         "and helper= are invalid with fd=");
+            return -1;
+        }
+
+        if (numqueues != qemu_opt_get_all(opts, "fd", fdp, 16)) {
+            error_report("the number of queue does not match the"
+                         "number of fd passed");
+            return -1;
+        }
+
+        for (i = 0; i < numqueues; i++) {
+            fd = net_handle_fd_param(cur_mon, fdp[i]);
+            if (fd == -1) {
+                return -1;
+            }
+
+            fcntl(fd, F_SETFL, O_NONBLOCK);
+
+            vnet_hdr = tap_probe_vnet_hdr(fd);
+
+            if (__net_init_tap(opts, cur_mon, name, vlan, fd, vnet_hdr))
+                return -1;
+        }
+    } else if (qemu_opt_get(opts, "helper")) {
+        if (qemu_opt_get(opts, "ifname") ||
+            qemu_opt_get(opts, "script") ||
+            qemu_opt_get(opts, "downscript") ||
+            qemu_opt_get(opts, "vnet_hdr") ||
+            numqueues > 1) {
+            error_report("ifname=, script=, downscript=, and vnet_hdr=, "
+                         "and queues > 1 are invalid with helper=");
+            return -1;
+        }
+
+        fd = net_bridge_run_helper(qemu_opt_get(opts, "helper"),
+                                   DEFAULT_BRIDGE_INTERFACE);
+        if (fd == -1) {
+            return -1;
+        }
+
+        fcntl(fd, F_SETFL, O_NONBLOCK);
+
+        vnet_hdr = tap_probe_vnet_hdr(fd);
+    } else {
+        if (!qemu_opt_get(opts, "script")) {
+            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
+        }
+
+        if (!qemu_opt_get(opts, "downscript")) {
+            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
+        }
+
+        for (i = 0; i < numqueues; i++) {
+            fd = net_tap_init(opts, &vnet_hdr, i != 0);
+            if (fd == -1) {
+                return -1;
+            }
+            if(__net_init_tap(opts, cur_mon, name, vlan, fd, vnet_hdr))
+                return -1;
+        }
+    }
     return 0;
 }
 
@@ -713,3 +735,36 @@ VHostNetState *tap_get_vhost_net(VLANClientState *nc)
     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
     return s->vhost_net;
 }
+
+int tap_attach(VLANClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    int ret;
+
+    if (s->enabled) {
+        return 0;
+    } else {
+        ret = tap_fd_attach(s->fd, s->ifname);
+        if (ret == 0) {
+            s->enabled = 1;
+        }
+        return ret;
+    }
+}
+
+int tap_detach(VLANClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    int ret;
+
+    if (s->enabled == 0) {
+        return 0;
+    } else {
+        ret = tap_fd_detach(s->fd, s->ifname);
+        if (ret == 0) {
+            s->enabled = 0;
+        }
+        return ret;
+    }
+}
+
diff --git a/net/tap.h b/net/tap.h
index b2a9450..cead7ca 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -34,7 +34,8 @@
 
 int net_init_tap(QemuOpts *opts, const char *name, VLANState *vlan);
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int attach);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
@@ -51,6 +52,10 @@ int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_attach(VLANClientState *vc);
+int tap_detach(VLANClientState *vc);
+int tap_fd_attach(int fd, const char *ifname);
+int tap_fd_detach(int fd, const char *ifname);
 
 int tap_get_fd(VLANClientState *vc);
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux