[PATCH V4 RESEND 15/22] tap: multiqueue support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Recently, linux support multiqueue tap which could let userspace call TUNSETIFF
for a signle device many times to create multiple file descriptors as
independent queues. User could also enable/disabe a specific queue through
TUNSETQUEUE.

The patch adds the generic infrastructure to create multiqueue taps. To achieve
this a new parameter "queues" were introduced to specify how many queues were
expected to be created for tap by qemu itself. Alternatively, management could
also pass multiple pre-created tap file descriptors separated with ':' through a
new parameter fds like -netdev tap,id=hn0,fds="X:Y:..:Z". Multiple vhost file
descriptors could also be passed in this way.

Each TAPState were still associated to a tap fd, which mean multiple TAPStates
were created when user needs multiqueue taps. Since each TAPState contains one
NetClientState, with the multiqueue nic support, an N peers of NetClientState
were built up.

A new parameter, mq_required were introduce in tap_open() to create multiqueue
tap fds.

Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
---
 include/net/tap.h |    1 -
 net/tap-aix.c     |    3 +-
 net/tap-bsd.c     |    3 +-
 net/tap-haiku.c   |    3 +-
 net/tap-linux.c   |    4 +-
 net/tap-solaris.c |    3 +-
 net/tap.c         |  158 +++++++++++++++++++++++++++++++++++++++++------------
 net/tap_int.h     |    3 +-
 qapi-schema.json  |    5 +-
 9 files changed, 139 insertions(+), 44 deletions(-)

diff --git a/include/net/tap.h b/include/net/tap.h
index c3eb85a..a994f20 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -37,7 +37,6 @@ void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int ecn,
 void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
-int tap_get_ifname(NetClientState *nc, char *ifname);
 
 int tap_get_fd(NetClientState *nc);
 
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 0e1eac3..3953b60 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -25,7 +25,8 @@
 #include "tap_int.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on AIX\n");
     return -1;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4f22109..bcdb268 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -33,7 +33,8 @@
 #include <net/if_tap.h>
 #endif
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     int fd;
 #ifdef TAPGIFNAME
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index b3b5fbb..e5ce436 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -25,7 +25,8 @@
 #include "tap_int.h"
 #include <stdio.h>
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     fprintf(stderr, "no tap on Haiku\n");
     return -1;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 3b21662..a953189 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -36,12 +36,12 @@
 
 #define PATH_NET_TUN "/dev/net/tun"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     struct ifreq ifr;
     int fd, ret;
     int len = sizeof(struct virtio_net_hdr);
-    int mq_required = 0;
 
     TFR(fd = open(PATH_NET_TUN, O_RDWR));
     if (fd < 0) {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 214d95e..9c7278f 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -173,7 +173,8 @@ static int tap_alloc(char *dev, size_t dev_size)
     return tap_fd;
 }
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required)
 {
     char  dev[10]="";
     int fd;
diff --git a/net/tap.c b/net/tap.c
index 8610ba2..1bf7609 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -558,17 +558,10 @@ int net_init_bridge(const NetClientOptions *opts, const char *name,
 
 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                         const char *setup_script, char *ifname,
-                        size_t ifname_sz)
+                        size_t ifname_sz, int mq_required)
 {
     int fd, vnet_hdr_required;
 
-    if (tap->has_ifname) {
-        pstrcpy(ifname, ifname_sz, tap->ifname);
-    } else {
-        assert(ifname_sz > 0);
-        ifname[0] = '\0';
-    }
-
     if (tap->has_vnet_hdr) {
         *vnet_hdr = tap->vnet_hdr;
         vnet_hdr_required = *vnet_hdr;
@@ -577,7 +570,8 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
         vnet_hdr_required = 0;
     }
 
-    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required));
+    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
+                      mq_required));
     if (fd < 0) {
         return -1;
     }
@@ -593,6 +587,8 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
     return fd;
 }
 
+#define MAX_TAP_QUEUES 1024
+
 static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                             const char *model, const char *name,
                             const char *ifname, const char *script,
@@ -611,17 +607,12 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
         return -1;
     }
 
-    if (tap->has_fd) {
+    if (tap->has_fd || tap->has_fds) {
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
     } else if (tap->has_helper) {
         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                  tap->helper);
     } else {
-        const char *downscript;
-
-        downscript = tap->has_downscript ? tap->downscript :
-            DEFAULT_NETWORK_DOWN_SCRIPT;
-
         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                  "ifname=%s,script=%s,downscript=%s", ifname, script,
                  downscript);
@@ -652,7 +643,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
             error_report("vhost-net requested but could not be initialized");
             return -1;
         }
-    } else if (tap->has_vhostfd) {
+    } else if (tap->has_vhostfd || tap->has_vhostfds) {
         error_report("vhostfd= is not valid without vhost");
         return -1;
     }
@@ -660,27 +651,54 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
     return 0;
 }
 
+static int get_fds(char *str, char *fds[], int max)
+{
+    char *ptr = str, *this;
+    size_t len = strlen(str);
+    int i = 0;
+
+    while (i < max && ptr < str + len) {
+        this = strchr(ptr, ':');
+
+        if (this == NULL) {
+            fds[i] = g_strdup(ptr);
+        } else {
+            fds[i] = g_strndup(ptr, this - ptr);
+        }
+
+        i++;
+        if (this == NULL) {
+            break;
+        } else {
+            ptr = this + 1;
+        }
+    }
+
+    return i;
+}
+
 int net_init_tap(const NetClientOptions *opts, const char *name,
                  NetClientState *peer)
 {
     const NetdevTapOptions *tap;
-
-    int fd, vnet_hdr = 0;
-    const char *model;
-
+    int fd, vnet_hdr = 0, i = 0, queues;
     /* for the no-fd, no-helper case */
     const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
     const char *downscript = NULL;
+    const char *vhostfdname;
     char ifname[128];
 
     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
     tap = opts->tap;
+    queues = tap->has_queues ? tap->queues : 1;
+    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 
     if (tap->has_fd) {
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr || tap->has_helper) {
+            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
+            tap->has_fds) {
             error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "and helper= are invalid with fd=");
+                         "helper=, queues=, and fds= are invalid with fd=");
             return -1;
         }
 
@@ -693,13 +711,61 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        model = "tap";
+        if (net_init_tap_one(tap, peer, "tap", NULL, NULL,
+                             script, downscript,
+                             vhostfdname, vnet_hdr, fd)) {
+            return -1;
+        }
+    } else if (tap->has_fds) {
+        char *fds[MAX_TAP_QUEUES];
+        char *vhost_fds[MAX_TAP_QUEUES];
+        int nfds, nvhosts;
+
+        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
+            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
+            tap->has_fd) {
+            error_report("ifname=, script=, downscript=, vnet_hdr=, "
+                         "helper=, queues=, and fd= are invalid with fds=");
+            return -1;
+        }
+
+        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
+        if (tap->has_vhostfds) {
+            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
+            if (nfds != nvhosts) {
+                error_report("The number of fds passed does not match the "
+                             "number of vhostfds passed");
+                return -1;
+            }
+        }
+
+        for (i = 0; i < nfds; i++) {
+            fd = monitor_handle_fd_param(cur_mon, fds[i]);
+            if (fd == -1) {
+                return -1;
+            }
+
+            fcntl(fd, F_SETFL, O_NONBLOCK);
 
+            if (i == 0) {
+                vnet_hdr = tap_probe_vnet_hdr(fd);
+            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
+                error_report("vnet_hdr not consistent across given tap fds");
+                return -1;
+            }
+
+            if (net_init_tap_one(tap, peer, "tap", name, ifname,
+                                 script, downscript,
+                                 tap->has_vhostfds ? vhost_fds[i] : NULL,
+                                 vnet_hdr, fd)) {
+                return -1;
+            }
+        }
     } else if (tap->has_helper) {
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
-            tap->has_vnet_hdr) {
+            tap->has_vnet_hdr || tap->has_queues || tap->has_fds) {
             error_report("ifname=, script=, downscript=, and vnet_hdr= "
-                         "are invalid with helper=");
+                         "queues=, and fds= are invalid with helper=");
             return -1;
         }
 
@@ -709,26 +775,48 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
         }
 
         fcntl(fd, F_SETFL, O_NONBLOCK);
-
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        model = "bridge";
-
+        if (net_init_tap_one(tap, peer, "bridge", name, ifname,
+                             script, downscript, vhostfdname,
+                             vnet_hdr, fd)) {
+            return -1;
+        }
     } else {
         script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
         downscript = tap->has_downscript ? tap->downscript :
             DEFAULT_NETWORK_DOWN_SCRIPT;
-        fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname);
-        if (fd == -1) {
-            return -1;
+
+        if (tap->has_ifname) {
+            pstrcpy(ifname, sizeof ifname, tap->ifname);
+        } else {
+            ifname[0] = '\0';
         }
 
-        model = "tap";
+        for (i = 0; i < queues; i++) {
+            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+                              ifname, sizeof ifname, queues > 1);
+            if (fd == -1) {
+                return -1;
+            }
+
+            if (queues > 1 && i == 0 && !tap->has_ifname) {
+                if (tap_fd_get_ifname(fd, ifname)) {
+                    error_report("Fail to get ifname");
+                    return -1;
+                }
+            }
+
+            if (net_init_tap_one(tap, peer, "tap", name, ifname,
+                                 i >= 1 ? "no" : script,
+                                 i >= 1 ? "no" : downscript,
+                                 vhostfdname, vnet_hdr, fd)) {
+                return -1;
+            }
+        }
     }
 
-    return net_init_tap_one(tap, peer, model, name, ifname, script,
-                            downscript, tap->has_vhostfd ? tap->vhostfd : NULL,
-                            vnet_hdr, fd);
+    return 0;
 }
 
 VHostNetState *tap_get_vhost_net(NetClientState *nc)
diff --git a/net/tap_int.h b/net/tap_int.h
index 125f83d..86bb224 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -32,7 +32,8 @@
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
 
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 3a4817b..cdd8384 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2533,6 +2533,7 @@
   'data': {
     '*ifname':     'str',
     '*fd':         'str',
+    '*fds':        'str',
     '*script':     'str',
     '*downscript': 'str',
     '*helper':     'str',
@@ -2540,7 +2541,9 @@
     '*vnet_hdr':   'bool',
     '*vhost':      'bool',
     '*vhostfd':    'str',
-    '*vhostforce': 'bool' } }
+    '*vhostfds':   'str',
+    '*vhostforce': 'bool',
+    '*queues':     'uint32'} }
 
 ##
 # @NetdevSocketOptions
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux