TCP connect hangs for 3 seconds

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Dear all,

we have a very strange problem concerning TCP connections: once in a while a new connect takes exactly 3 seconds to get established. We discovered this problem with connects to a mysql server but it is reproducable with a few lines of C code:

/* START test program */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <netinet/tcp.h>
#include <netinet/in.h>

main() {

   /* edit Server and Port */
   char *server = "192.168.1.1";
   int port = 3306;

   int sock;
   int len;
   int flag;
   struct sockaddr_in sock_addr;
   int count = 0;
   struct timeval tim;

   while(1) {

      printf("%d\n", count);

      gettimeofday(&tim, NULL);
      double t1=tim.tv_sec+(tim.tv_usec/1000000.0);

      if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP)) < 0) {
         fprintf(stderr, "ERROR: cannot create socket\n");
         exit(1);
      }

      fcntl(sock, F_SETFL, O_RDONLY);
      fcntl(sock, F_GETFL);

      memset (&sock_addr, 0, sizeof (sock_addr));
      sock_addr.sin_family = AF_INET;
      sock_addr.sin_addr.s_addr = inet_addr (server);
      sock_addr.sin_port = htons (port);
      len = sizeof(sock_addr);

      if (connect(sock, (struct sockaddr *)&sock_addr, len) < 0) {
         fprintf(stderr, "ERROR: cannot connect to %s\n", server);
         exit(1);
      }

      flag = 8;
      setsockopt(sock, SOL_IP, IP_TOS, (char *) &flag, sizeof(int));
      flag = 1;
      setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *) &flag, sizeof(int));
      setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &flag, sizeof(int));

      shutdown(sock, SHUT_RDWR);

      close(sock);

      gettimeofday(&tim, NULL);
      double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
      if ((t2-t1) > 1) {
         printf("%s: %.6lf seconds elapsed\n", server, t2-t1);
      }

      count++;

   }

}
/* END test program*/

Just compile the code on a client (first of all edit the IP address and port of the server), open a port on the server (e.g. with netcat: nc -k -l <PORT>) and run the binary against it. The program will terminate with the error message "ERROR: cannot connect to 192.168.1.1" but when you start it again and again you will get the described behavior after some time (message "192.168.1.1: 3.000903 seconds elapsed").

We have traced the client using strace with the following result (first number is the time taken for the system call):

     0.001620 gettimeofday({1202125307, 657853}, NULL) = 0
     0.000132 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
     0.000110 fcntl64(3, F_SETFL, O_RDONLY) = 0
     0.000076 fcntl64(3, F_GETFL)       = 0x2 (flags O_RDWR)
     0.000079 connect(3, {sa_family=AF_INET, sin_port=htons(3306), sin_addr=inet_addr("192.168.1.1")}, 16) = 0
     2.999904 setsockopt(3, SOL_IP, IP_TOS, [8], 4) = 0
     0.000099 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
     0.000076 setsockopt(3, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
     0.000072 shutdown(3, 2 /* send and receive */) = 0
     0.000150 close(3)                  = 0
     0.000245 gettimeofday({1202125310, 658756}, NULL) = 0
     0.000089 write(1, "192.168.1.1: 3.000903 seconds e"..., 39) = 39

As you can see the first setsockopt takes 3 seconds. In order to eliminate the setsockopt call as causer of the problem we have made the connect after all setsockopts with the following result:

     0.000225 gettimeofday({1202123697, 552194}, NULL) = 0
     0.000076 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
     0.000110 fcntl64(3, F_SETFL, O_RDONLY) = 0
     0.000077 fcntl64(3, F_GETFL)       = 0x2 (flags O_RDWR)
     0.000088 setsockopt(3, SOL_IP, IP_TOS, [8], 4) = 0
     0.000083 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
     0.000103 setsockopt(3, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
     0.000073 connect(3, {sa_family=AF_INET, sin_port=htons(3306), sin_addr=inet_addr("192.168.1.1")}, 16) = 0
     3.000288 shutdown(3, 2 /* send and receive */) = 0
     0.000199 close(3)                  = 0
     0.000236 gettimeofday({1202123700, 553527}, NULL) = 0
     0.000090 write(1, "192.168.1.1: 3.001333 seconds e"..., 39) = 39

It looks like the connect call itself is not the problem rather then the first access to the socket after the connect. Perhaps the "real" connect (TCP SYN paket) is not made until the first access to the socket happens (regardless if this first access is an setsockopt or shutdown).

When you look at the TCP pakets transmitted between server an client you will see that the client sends a SYN and the server answers with a SYN/ACK. However the client will never receive the SYN/ACK. Therefore the client resends a SYN after exactly 3 seconds. The server answers with a SYN/ACK again and this time the client receives the paket. Now the connection is working.

tcpdump from the client:

11:24:44.045735 IP 192.168.1.99.39028 > 192.168.1.1.3306: S 3068401272:3068401272(0) win 5840 <mss 1460,sackOK,timestamp 10592352 0,nop,wscale 6>
11:24:47.045710 IP 192.168.1.99.39028 > 192.168.1.1.3306: S 3068401272:3068401272(0) win 5840 <mss 1460,sackOK,timestamp 10595352 0,nop,wscale 6>
11:24:47.045911 IP 192.168.1.1.3306 > 192.168.1.99.39028: S 3052139931:3052139931(0) ack 3068401273 win 5792 <mss 1460,sackOK,timestamp 1048187 10592352,nop,wscale 7>
11:24:47.046015 IP 192.168.1.99.39028 > 192.168.1.1.3306: . ack 1 win 92 <nop,nop,timestamp 10595352 1048187>
11:24:47.046541 IP 192.168.1.99.39028 > 192.168.1.1.3306: F 1:1(0) ack 1 win 92 <nop,nop,timestamp 10595352 1048187>
11:24:47.046845 IP 192.168.1.1.3306 > 192.168.1.99.39028: F 1:1(0) ack 2 win 46 <nop,nop,timestamp 1048187 10595352>
11:24:47.046880 IP 192.168.1.99.39028 > 192.168.1.1.3306: . ack 2 win 92 <nop,nop,timestamp 10595353 1048187>

tcpdump from the server:

11:24:44.226373 IP 192.168.1.99.39028 > 192.168.1.1.3306: S 3068401272:3068401272(0) win 5840 <mss 1460,sackOK,timestamp 10592352 0,nop,wscal e 6>
11:24:44.226389 IP 192.168.1.1.3306 > 192.168.1.99.39028: S 3052139931:3052139931(0) ack 3068401273 win 5792 <mss 1460,sackOK,timestamp 10478 87 10592352,nop,wscale 7>
11:24:47.226673 IP 192.168.1.99.39028 > 192.168.1.1.3306: S 3068401272:3068401272(0) win 5840 <mss 1460,sackOK,timestamp 10595352 0,nop,wscale 6>
11:24:47.226715 IP 192.168.1.1.3306 > 192.168.1.99.39028: S 3052139931:3052139931(0) ack 3068401273 win 5792 <mss 1460,sackOK,timestamp 1048187 10592352,nop,wscale 7>
11:24:47.226957 IP 192.168.1.99.39028 > 192.168.1.1.3306: . ack 1 win 92 <nop,nop,timestamp 10595352 1048187>
11:24:47.227488 IP 192.168.1.99.39028 > 192.168.1.1.3306: F 1:1(0) ack 1 win 92 <nop,nop,timestamp 10595352 1048187>
11:24:47.227580 IP 192.168.1.1.3306 > 192.168.1.99.39028: F 1:1(0) ack 2 win 46 <nop,nop,timestamp 1048187 10595352>
11:24:47.227810 IP 192.168.1.99.39028 > 192.168.1.1.3306: . ack 2 win 92 <nop,nop,timestamp 10595353 1048187>

The interesting part: when the delay of 3 seconds occurs then every new connection has this problem. So it can't be a missing TCP paket due to a faulty switch or something else. Furthermore the problem can be reproduced also locally on the same machine (-> lo and not ether) without any network stuff (cables, switches, ...). In addition the kernel version seems to be relevant. There are kernels where this problem occures very often (e.g. kernel 2.6.23.8) and versions where it's only sporadically apparent (e.g. kernel 2.6.20.3 or kernel 2.6.23.1). We have tested with standard Fedora kernels as well as with "original" kernels from kernel.org but there seems to be no kernel WITHOUT this behavior (it's a general issue regardless of the kernel version). We have also tested with different hardware (especially different network cards with different drivers -> tg3, e1000) without success.

Has somebody else seen this problem before and can tell us how to fix it? We haven't found anything on the web. Are there any sysctl parameters or something like that? Is it really a kernel problem/bug (seems so) or perhaps a feature?

Any help would be appreciated. At the moment we have nasty hangs in our application due to the "3 second connects".

Kind regards,
Leo

-- 
Psssst! Schon vom neuen GMX MultiMessenger gehört?
Der kann`s mit allen: http://www.gmx.net/de/go/multimessenger
-
To unsubscribe from this list: send the line "unsubscribe linux-net" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux