[PATCH] kvm tools: Implement virtio net TSO/UFO support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch bumps host to guest tcp bandwidth from

1060 Mib/s to 1760 Mib/s,

and guest to host tcp bandwidth from

 342 Mib/s to  619 Mib/s.

*************************
Without TSO and UFO
*************************
(guest <- host)
root@sid1:~# iperf -s
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
[  4] local 192.168.33.15 port 5001 connected with 192.168.33.2 port 38733
[ ID] Interval       Transfer     Bandwidth
[  4]  0.0-10.0 sec  1.23 GBytes  1.06 Gbits/sec
^Croot@sid1:~# iperf -s -u
------------------------------------------------------------
Server listening on UDP port 5001
Receiving 1470 byte datagrams
UDP buffer size:   110 KByte (default)
------------------------------------------------------------
[  3] local 192.168.33.15 port 5001 connected with 192.168.33.2 port 54933
[ ID] Interval       Transfer     Bandwidth       Jitter   Lost/Total Datagrams
[  3]  0.0-10.0 sec  1.25 MBytes  1.05 Mbits/sec  0.030 ms    0/  893 (0%)

(guest to host)
root@sid1:~# iperf -c host
------------------------------------------------------------
Client connecting to host, TCP port 5001
TCP window size: 16.0 KByte (default)
------------------------------------------------------------
[  3] local 192.168.33.15 port 42197 connected with 192.168.33.2 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
root@sid1:~# iperf -c host -u
------------------------------------------------------------
Client connecting to host, UDP port 5001
Sending 1470 byte datagrams
UDP buffer size:   110 KByte (default)
------------------------------------------------------------
[  3] local 192.168.33.15 port 56176 connected with 192.168.33.2 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0-10.0 sec  1.25 MBytes  1.05 Mbits/sec
[  3] Sent 893 datagrams
[  3] Server Report:
[  3]  0.0-10.0 sec  1.25 MBytes  1.05 Mbits/sec  0.012 ms    0/  893 (0%)

*************************
With TSO and UFO
*************************

(guest <- host)
root@sid1:~# iperf  -s
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
[  4] local 192.168.33.15 port 5001 connected with 192.168.33.2 port 42767
[ ID] Interval       Transfer     Bandwidth
[  4]  0.0-10.0 sec  2.05 GBytes  1.76 Gbits/sec
root@sid1:~# iperf  -s -u
------------------------------------------------------------
Server listening on UDP port 5001
Receiving 1470 byte datagrams
UDP buffer size:   110 KByte (default)
------------------------------------------------------------
[  3] local 192.168.33.15 port 5001 connected with 192.168.33.2 port 35049
[ ID] Interval       Transfer     Bandwidth       Jitter   Lost/Total Datagrams
[  3]  0.0-10.0 sec  1.25 MBytes  1.05 Mbits/sec  0.031 ms    0/  893 (0%)

(guest -> host)
asias@hj:~$ iperf -s
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
[  4] local 192.168.33.2 port 5001 connected with 192.168.33.15 port 60868
[ ID] Interval       Transfer     Bandwidth
[  4]  0.0-10.0 sec   738 MBytes   619 Mbits/sec
asias@hj:~$ iperf -s -u
------------------------------------------------------------
Server listening on UDP port 5001
Receiving 1470 byte datagrams
UDP buffer size:  112 KByte (default)
------------------------------------------------------------
[  3] local 192.168.33.2 port 5001 connected with 192.168.33.15 port 40602
[ ID] Interval       Transfer     Bandwidth        Jitter   Lost/Total Datagrams
[  3]  0.0-10.0 sec  1.25 MBytes  1.05 Mbits/sec   0.030 ms    0/  893 (0%)

Signed-off-by: Asias He <asias.hejun@xxxxxxxxx>
---
 tools/kvm/virtio-net.c |   52 +++++++++++++++++++++++++++++------------------
 1 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/tools/kvm/virtio-net.c b/tools/kvm/virtio-net.c
index f8d7276..a55c4a4 100644
--- a/tools/kvm/virtio-net.c
+++ b/tools/kvm/virtio-net.c
@@ -57,8 +57,14 @@ static struct net_device net_device = {
 		.mac		= {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
 		.status		= VIRTIO_NET_S_LINK_UP,
 	},
-
-	.host_features		= 1UL << VIRTIO_NET_F_MAC,
+	.host_features		= 1UL << VIRTIO_NET_F_MAC |
+				  1UL << VIRTIO_NET_F_CSUM |
+				  1UL << VIRTIO_NET_F_HOST_UFO |
+				  1UL << VIRTIO_NET_F_HOST_TSO4 |
+				  1UL << VIRTIO_NET_F_HOST_TSO6 |
+				  1UL << VIRTIO_NET_F_GUEST_UFO |
+				  1UL << VIRTIO_NET_F_GUEST_TSO4 |
+				  1UL << VIRTIO_NET_F_GUEST_TSO6,
 };
 
 static void *virtio_net_rx_thread(void *p)
@@ -81,13 +87,8 @@ static void *virtio_net_rx_thread(void *p)
 
 		while (virt_queue__available(vq)) {
 			head = virt_queue__get_iov(vq, iov, &out, &in, self);
-
-			/* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */
-			len = readv(net_device.tap_fd, iov + 1, in - 1);
-
-			/* However, We have to tell guest we have write the virtio_net_hdr */
-			virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len);
-
+			len = readv(net_device.tap_fd, iov, in);
+			virt_queue__set_used_elem(vq, head, len);
 			/* We should interrupt guest right now, otherwise latency is huge. */
 			kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
 		}
@@ -119,7 +120,7 @@ static void *virtio_net_tx_thread(void *p)
 
 		while (virt_queue__available(vq)) {
 			head = virt_queue__get_iov(vq, iov, &out, &in, self);
-			len = writev(net_device.tap_fd, iov + 1, out - 1);
+			len = writev(net_device.tap_fd, iov, out);
 			virt_queue__set_used_elem(vq, head, len);
 		}
 
@@ -280,23 +281,22 @@ static struct pci_device_header virtio_net_pci_device = {
 
 static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
 {
-	struct ifreq ifr;
 	int sock = socket(AF_INET, SOCK_STREAM, 0);
-	int i, pid, status;
+	int i, pid, status, offload, hdr_len;
 	struct sockaddr_in sin = {0};
+	struct ifreq ifr;
 
 	for (i = 0 ; i < 6 ; i++)
 		net_device.net_config.mac[i] = params->guest_mac[i];
 
 	net_device.tap_fd = open("/dev/net/tun", O_RDWR);
 	if (net_device.tap_fd < 0) {
-		warning("Unable to open /dev/net/tun\n");
+		warning("Unable to open /dev/net/tun");
 		goto fail;
 	}
 
 	memset(&ifr, 0, sizeof(ifr));
-	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 	if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) {
 		warning("Config tap device error. Are you root?");
 		goto fail;
@@ -304,7 +304,22 @@ static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
 
 	strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
 
-	ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
+	if (ioctl(net_device.tap_fd, TUNSETNOCSUM, 1) < 0) {
+		warning("Config tap device TUNSETNOCSUM error");
+		goto fail;
+	}
+
+	hdr_len = sizeof(struct virtio_net_hdr);
+	if (ioctl(net_device.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) {
+		warning("Config tap device TUNSETVNETHDRSZ error");
+		goto fail;
+	}
+
+	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
+	if (ioctl(net_device.tap_fd, TUNSETOFFLOAD, offload) < 0) {
+		warning("Config tap device TUNSETOFFLOAD error");
+		goto fail;
+	}
 
 	if (strcmp(params->script, "none")) {
 		pid = fork();
@@ -320,15 +335,12 @@ static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
 		}
 	} else {
 		memset(&ifr, 0, sizeof(ifr));
-
 		strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name));
-
 		sin.sin_addr.s_addr = inet_addr(params->host_ip);
 		memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
 		ifr.ifr_addr.sa_family = AF_INET;
-
 		if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
-			warning("Can not set ip address on tap device");
+			warning("Could not set ip address on tap device");
 			goto fail;
 		}
 	}
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux