Hello, We are seeing a peculiar case of net_ratelimit messages with rt_preempt patched kernel. Kernel we are using 3.14.79 and rt patch rt85. Our system is also ramfs based. The application in question polls for number of devices using non blocking TCP. Application in question has 5 threads. When a device is not present some time a connection request gets EINPROGRESS error at this time there will be a select call which waits for 9ms time before closing socket. When this application runs we see a lot of *net_ratelimit: xxxx callbacks supressed* messages flooding on the screen. When this netratelimit messages start an unrelated UDP application which broadcasts messages fails to send broadcast packets, send call fails with EINVAL (as per man page this can only happen if the size passed to send is incorrect). And when this is happening the network interfaces are not accessible (device does not respond to ping), though network LED's blink normally. Attached is a sample application which if run using the attached script reproduces the issue. We tried the same case on a Xenomai based kernel and there this issue is not reproduced. This only seems to be happening with RT patch applied. We tried setting net.core.message_cost=0 to see what message is really getting suppressed but no message pops up in kernel log. We are not sure what really is happening behind the scene. Any hints or suggestion to investigate this?.. Thanks in advance, Jens
#include <sys/types.h> #include <sys/socket.h> #include <sys/select.h> #include <sys/time.h> #include <sys/ioctl.h> #include <arpa/inet.h> #include <netinet/in.h> #include <unistd.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define CHK_NULL(x) if ((x)==NULL) exit (1) #define CHK_ERR(err,s) if ((err)==-1) { perror(s); exit(1); } #define CHK_SSL(err) if ((err)==-1) { ERR_print_errors_fp(stderr); exit(2); } int sfd; void openProtocol(const char *hostname) { int sfd = socket(PF_INET, SOCK_STREAM, 0); CHK_ERR(sfd, "socket"); struct sockaddr_in address; memset(&address, 0, sizeof(address)); address.sin_family = PF_INET; address.sin_addr.s_addr = inet_addr(hostname); address.sin_port = htons(502); int opt = 1; int result = ioctl(sfd, FIONBIO, &opt); if (result == -1) { close(sfd); CHK_ERR(result, "ioctl"); } result = connect(sfd, (struct sockaddr*)&address, sizeof(address)); if (result == -1 && errno != EINPROGRESS) { close(sfd); CHK_ERR(result, "connect"); } fd_set fdlist; FD_ZERO(&fdlist); struct timeval tm; tm.tv_sec = 0; tm.tv_usec = 9000; /* 9ms wait on select */ result = select(sfd + 1, NULL, &fdlist, NULL, &tm); if (result == 0) { printf ("TCP/IP connection error!\n"); close(sfd); } if (result == -1) { close(sfd); CHK_ERR(result, "select"); } } int main(int argc, char *argv[]) { char *base_addr = "192.168.171."; unsigned short startAddr = 101; if ( argc < 3) { fprintf (stderr, "Please provide number of IP and wait period\n"); exit(EXIT_FAILURE); } unsigned int no_of_ip; if (sscanf (argv[1], "%u", (unsigned int*)&no_of_ip) != 1 ) { fprintf (stderr, "Scan of number of device failed\n"); exit(EXIT_FAILURE); } unsigned int wait_period; if (sscanf (argv[2], "%u", (unsigned int*)&wait_period) != 1) { fprintf (stderr, "Scan of wait period failed\n"); exit(EXIT_FAILURE); } char full_addr[100]; char last_part[10]; while (1) { for (unsigned int i = startAddr; i < (startAddr + no_of_ip); i++) { memset(&full_addr[0], 0, sizeof(full_addr)); memset(&last_part[0], 0, sizeof(last_part)); strncat (full_addr, base_addr, sizeof(full_addr)-1); snprintf (last_part, sizeof(last_part), "%u", i); strncat (full_addr, last_part, sizeof(full_addr)-1); openProtocol(full_addr); usleep(wait_period * 1000); } } return 0; }
Attachment:
start_sclient.sh
Description: Bourne shell script