TCP Throughput Oddity

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




I'm seeing peculiar TCP throughput behavior in Linux 2.4 that I can't
explain.  I wonder if someone can help me with the missing insight.

I have a throughput benchmark that measures the time to send data from
one process to another over TCP.  I have applied it to a pair of
600MHz PIIIs running 2.4.0-test11.  They are connected by 100Mb/s
switched ethernet.

Ordinarily, when I send 8MB in 16 byte block sizes, I get about 52
Mb/s.  However, if I exchange one 4 byte block over the connection (in
each direction) before I start timing the the 8MB transfer, it goes up
to about 57 Mb/s.  (By "block size", I mean the size of the buffer
passed to write.)

Does anyone have any idea how the initial small transfer can have such
large influence on overall throughput?

Below is a copy of `tp', the benchmark program (apologies for its
length), and instructions for using it to reproduce my results.

Thanks,
Vic Zandy


Use tp to measure throughput of 8MB in 16 byte blocks:

1. On host A, start tp as a server (-s) with timing enabled (-t).
   It will print the port number on which it is listening.

    A% tp -s -t
    Server listening on port 1033

2. On host B, start tp as a client.  Specify the server host (-h) and
   port (-p), the total data transfer size (-n), the block size (-z),
   and enable timing (-t).

    B% tp -h A -p 1033 -n 8M -z 16

After the tp client transfer its entire load, each program will print
the observed throughput on its end of the connection.  The client will
exit and the server will wait for a new client.

    A% tp -s -t
    Server listening on port 1033
    52.0 Mbits/sec    986 bytes/read 8388608 bytes total

    B% tp -h A -p 1033 -n 8M -z 16
    16 bytes/write:    1230620 usec,   52.0 Mb/sec

To re-run the measurement with the initial 4 byte exchange, add (-f)
to the client and server tp invocations:

    A% tp -s -t -f
    Server listening on port 1034
    56.9 Mbits/sec   1105 bytes/read 8388608 bytes total

    B% tp -h A -p 1034 -n 8M -z 16 -f
    16 bytes/write:    1124185 usec,   56.9 Mb/sec


/* tp.c: gcc -Wall -O2 -o tp t.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/time.h>
#include <assert.h>
#include <errno.h>

static int DEBUG = 0;

#define DEFAULT_BUFFER_SIZE   (8*1024*1024)
static unsigned char *buffer;

#define MIN(x,y) ((x) < (y) ? (x) : (y))

static double
mbps(unsigned bytes, struct timeval *tv)
{
	double m;
	m = bytes * 8;                           /* bits */
	m /= (tv->tv_sec*1000000 + tv->tv_usec); /* bits/usec */
	m *= 1000000;                            /* bits/sec  */
	m /= (1024 * 1024);                      /* Mbits/sec */
	return m;
}

static struct in_addr
hostname_to_addr(char *hostname)
{
	struct hostent* h;
	struct in_addr addr;
	h = gethostbyname(hostname);
	if (!h) {
		fprintf(stdout, "Host lookup failed: %s\n", hostname);
		exit(1);
	}
	addr = *((struct in_addr *) h->h_addr); /* network order */
	return addr;
}

static int
xread(int sd, void *buf, size_t len)
{
	char *p = (char *)buf;
	size_t nrecv = 0;
	ssize_t rv;
	
	while (nrecv < len) {
		rv = read(sd, p, len - nrecv);
		if (0 > rv && errno == EINTR)
			continue;
		if (0 >= rv)
			return -1;
		nrecv += rv;
		p += rv;
	}
	return nrecv;
}

static int
xwrite(int sd, void *buf, size_t len)
{
	char *p = (char *)buf;
	size_t nsent = 0;
	ssize_t rv;
	
	while (nsent < len) {
		rv = write(sd, p, len - nsent);
		if (0 > rv && errno == EINTR)
			continue;
		if (0 > rv)
			return -1;
		nsent += rv;
		p += rv;
	}
	return nsent;
}

/* c = a - b */
static
void tv_diff(const struct timeval *a,
	     const struct timeval *b,
	     struct timeval *c)
{
	c->tv_sec = a->tv_sec - b->tv_sec;
	c->tv_usec = a->tv_usec - b->tv_usec;
	if (c->tv_usec < 0) {
		c->tv_sec -= 1;
		c->tv_usec += 1000000;
	}
}

static int
do_discard(int sock, int buflen, struct timeval *tv)
{
	int rv;
	unsigned nread;
	unsigned loops;
	struct timeval s;

	if (tv) {
		fd_set fds;
		/* Wait for data to be ready */
		FD_ZERO(&fds);
		FD_SET(sock, &fds);
		if (0 > select(sock+1, &fds, NULL, NULL, NULL)) {
			perror("select");
			return -1;
		}
		gettimeofday(&s, NULL);
	}
	nread = 0;
	loops = 0;
	while (1) {
		rv = read(sock, buffer, buflen);
		if (0 > rv) {
			perror("read");
			return 0;
		}
		if (0 == rv) {
			if (DEBUG) {
				fprintf(stdout, "EOF\n");
				fflush(stdout);
			}
			goto out;
		}
		nread += rv;
		loops++;
		if (DEBUG) {
			fprintf(stdout, ".");
			fflush(stdout);
		}
	}
 out:
	if (tv && nread > 0) {
		gettimeofday(tv, NULL);
		tv_diff(tv, &s, tv);
		printf(" %6.1f Mbits/sec %6.0f bytes/read %d bytes total\n",
		       mbps(nread, tv),
		       (double)nread/loops,
		       nread);
		fflush(stdout);
	}
	return 0;
}

/* Connect to HOST:PORT and return the new socket.  If TV is non-null,
   return in TV the wall time for establishing the connection. */
static int
do_connect(char *host, short port, struct timeval *tv)
{
	int sd;
	struct sockaddr_in addr;
	int len;
	struct timeval s, e;
	int rv, ern;

	sd = socket(AF_INET, SOCK_STREAM, 0);
	if (0 > sd) {
		perror("socket");
		return -1;
	}
	bzero(&addr, sizeof(addr));
	addr.sin_family = AF_INET;
	addr.sin_addr = hostname_to_addr(host);
	addr.sin_port = htons(port);
	len = sizeof(addr);
	if (tv)
		gettimeofday(&s, NULL);
	rv = connect(sd, &addr, len);
	ern = errno;
	if (tv) {
		gettimeofday(&e, NULL);
		tv_diff(&e, &s, tv);
	}
	if (0 > rv) {
		errno = ern;
		perror("connect");
		return -1;
	}
	return sd;
}


/* PORT is in network order */
static int
do_lb(short port, struct sockaddr_in *addr)
{
	int sd;
	int len;

	sd = socket(AF_INET, SOCK_STREAM, 0);
	if (0 > sd) {
		perror("socket");
		return -1;
	}

	bzero(addr, sizeof(struct sockaddr_in));
	addr->sin_family = AF_INET;
	addr->sin_addr.s_addr = INADDR_ANY;
	addr->sin_port = port;
	len = sizeof(struct sockaddr_in);
	if (0 > bind(sd, (struct sockaddr *) addr, len)) {
		perror("bind");
		return -1;
	}
	len = sizeof(addr);
	if (0 > getsockname(sd, (struct sockaddr *) addr, &len)) {
		perror("getsockname");
		return -1;
	}
	if (0 > listen(sd, 1)) {
		perror("listen");
		return -1;
	}

	fprintf(stderr,
		"Server listening on port %d\n", ntohs(addr->sin_port));
	return sd;
}

static int
do_accept(int serv)
{
	int sock;
	struct sockaddr_in addr;
	int len;

	len = sizeof(addr);
	sock = accept(serv, (struct sockaddr *) &addr, &len);
	if (0 > sock) {
		perror("accept");
		return -1;
	}
	return sock;
}

static int
do_one_way_write(int sock, const int buflen, int n, struct timeval *tv)
{
	struct timeval s, e;
	int todo;

	if (tv)
		gettimeofday(&s, NULL);
	todo = n;
	while (todo > 0) {
		if (0 > xwrite(sock, buffer, MIN(buflen, todo))) {
			perror("write");
			return -1;
		}
		if (DEBUG) {
			fprintf(stdout, ".");
			fflush(stdout);
		}
		todo -= MIN(buflen, todo);
	}
	if (tv) {
		gettimeofday(&e, NULL);
		tv_diff(&e, &s, tv);
		printf("%10d bytes/write: %10ld usec, %6.1f Mb/sec\n",
		       buflen,
		       tv->tv_sec*1000000 + tv->tv_usec,
		       mbps(n, tv));
	}
	return 0;
}

static void
usage_and_exit(int e)
{
        fprintf(stdout, "Usage: sock [switches] [mode]\n");
        fprintf(stdout, " Switches:\n");
        fprintf(stdout, "  -s             Run as a server\n");
        fprintf(stdout, "  -h <host>      (Clients) Server host (default localhost)\n");
        fprintf(stdout, "  -p <port>      (Clients) Server port\n");
        fprintf(stdout, "  -d             Print debugging information\n");
        fprintf(stdout, "  -f             Futz with the connection\n");
        fprintf(stdout, "  -t             Print timing statistics\n");
	fprintf(stdout, "  -n <bytes>     Number of bytes to transfer\n");
        fprintf(stdout, "  -z <bytes>      Set network I/O buffer size\n");
	fprintf(stdout, "  <bytes> may include suffix of `K' or `M'\n");
	exit(e);
}

static unsigned long
parsebytes(char *p)
{
	unsigned long l, mul;
	char *q;

	/* 100, 100K, 100k, 100M, 100m */
	l = strtoul(p, &q, 10);
	if (!strlen(q))
		return l;
	if (strlen(q) > 1)
		goto err;
	if (*q == 'k' || *q == 'K')
		mul = 1024;
	else if (*q == 'm' || *q == 'M')
		mul = 1024 * 1024;
	else
		goto err;
	return l * mul;
 err:
	fprintf(stderr, "Bad byte count specification %s, using %ld\n", p, l);
	return l;
}

static int
do_futz(int sock)
{
	unsigned long a;
	int futz_factor = 1;
	int i;

	for (i = 0; i < futz_factor; i++) {
		if (0 > xwrite(sock, &a, sizeof(a))) {
			perror("do_futz");
			exit(1);
		}
	}
	for (i = 0; i < futz_factor; i++) {
		if (0 > xread(sock, &a, sizeof(a))) {
			perror("do_futz");
			exit(1);
		}
	}
	return 0;
}


int
main(int argc, char *argv[])
{
	int c;
	int sock, serv;
	struct timeval tv;
	struct sockaddr_in addr;

	char *host = "localhost";      /* Host for client connections */
	int port = 0;                  /* Port for client connections */
	int n = 64 * 1024 * 1024;      /* Bytes to xfer */
	int sz = DEFAULT_BUFFER_SIZE;  /* Buffer size */
	int server = 0;                /* Server or client? */
	int timing = 0;                /* Collect timing statistics? */
	int futz = 0;                  /* Drop 8 bytes into the stream before timing */

	opterr = 1;
	optind = 0;
	while (EOF != (c = getopt(argc, argv, "fsp:h:dtz:n:")))
		switch (c) {
		case 'f':
			futz = 1;
			break;
		case 's':
			server = 1;
			break;
		case 'p':
			port = atoi(optarg);
			break;
		case 'h':
			host = optarg;
			break;
		case 'd':
			DEBUG++;
			break;
		case 't':
			timing = 1;
			break;
		case 'z':
			sz = atoi(optarg);
			break;
		case 'n':
			n = parsebytes(optarg);
			break;
		case '?':
			usage_and_exit(1);
			break;
		}
	if (!server && (!host || !port))
		usage_and_exit(1);
	buffer = (unsigned char*) malloc(sz);
	if (!buffer) {
		fprintf(stdout, "Out of memory.\n");
		exit(1);
	}
	bzero(buffer, sz);
	if (server) {
		serv = do_lb(htons(0), &addr);
		if (0 > serv) {
			fprintf(stdout, "Failed to create listener\n");
			exit(1);
		}
		while (1) {
			sock = do_accept(serv);
			if (0 > sock) {
				fprintf(stdout, "Failed to accept client\n");
				exit(1);
			}
			if (futz)
				do_futz(sock);
			if (0 > do_discard(sock, sz, timing ? &tv : NULL)) {
				fprintf(stdout, "Server loop failed\n");
				exit(1);
			}
			close(sock);
		}
	} else {
		sock = do_connect(host, port, timing ? &tv : NULL);
		if (0 > sock) {
			fprintf(stdout, "Client connect failed\n");
			exit(1);
		}
		if (futz)
			do_futz(sock);
		sleep(1); /* Give server time to catch up before timing */
		if (0 > do_one_way_write(sock, sz, n, timing ? &tv : NULL)) {
			fprintf(stdout, "Client loop failed\n");
			exit(1);
		}
		close(sock);
	}
	free(buffer);
	return 0;
}
/* End of tp.c */
-
: send the line "unsubscribe linux-net" in
the body of a message to majordomo@vger.kernel.org


[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux