> -----Original Message----- > From: John Fastabend [mailto:john.fastabend@xxxxxxxxx] > Sent: Tuesday, September 27, 2022 5:16 AM > To: liujian (CE) <liujian56@xxxxxxxxxx>; Cong Wang > <xiyou.wangcong@xxxxxxxxx> > Cc: John Fastabend <john.fastabend@xxxxxxxxx>; Jakub Sitnicki > <jakub@xxxxxxxxxxxxxx>; Eric Dumazet <edumazet@xxxxxxxxxx>; davem > <davem@xxxxxxxxxxxxx>; yoshfuji@xxxxxxxxxxxxxx; dsahern@xxxxxxxxxx; > Jakub Kicinski <kuba@xxxxxxxxxx>; Paolo Abeni <pabeni@xxxxxxxxxx>; > netdev <netdev@xxxxxxxxxxxxxxx>; bpf@xxxxxxxxxxxxxxx > Subject: RE: [bug report] one possible out-of-order issue in sockmap > > liujian (CE) wrote: > > > > > > > -----Original Message----- > > > From: Cong Wang [mailto:xiyou.wangcong@xxxxxxxxx] > > > Sent: Monday, September 26, 2022 2:26 AM > > > To: liujian (CE) <liujian56@xxxxxxxxxx> > > > Cc: John Fastabend <john.fastabend@xxxxxxxxx>; Jakub Sitnicki > > > <jakub@xxxxxxxxxxxxxx>; Eric Dumazet <edumazet@xxxxxxxxxx>; > davem > > > <davem@xxxxxxxxxxxxx>; yoshfuji@xxxxxxxxxxxxxx; dsahern@xxxxxxxxxx; > > > Jakub Kicinski <kuba@xxxxxxxxxx>; Paolo Abeni <pabeni@xxxxxxxxxx>; > > > netdev <netdev@xxxxxxxxxxxxxxx>; bpf@xxxxxxxxxxxxxxx > > > Subject: Re: [bug report] one possible out-of-order issue in sockmap > > > > > > On Sat, Sep 24, 2022 at 07:59:15AM +0000, liujian (CE) wrote: > > > > Hello, > > > > > > > > I had a scp failure problem here. I analyze the code, and the > > > > reasons may > > > be as follows: > > > > > > > > From commit e7a5f1f1cd00 ("bpf/sockmap: Read psock ingress_msg > > > before > > > > sk_receive_queue", if we use sockops > > > > (BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB > > > > and BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) to enable socket's > > > sockmap > > > > function, and don't enable strparse and verdict function, the > > > > out-of-order problem may occur in the following process. > > > > > > > > client SK server SK > > > > ------------------------------------------------------------------ > > > > ---- > > > > ---- > > > > tcp_rcv_synsent_state_process > > > > tcp_finish_connect > > > > tcp_init_transfer > > > > tcp_set_state(sk, TCP_ESTABLISHED); > > > > // insert SK to sockmap > > > > wake up waitter > > > > tcp_send_ack > > > > > > > > tcp_bpf_sendmsg(msgA) > > > > // msgA will go tcp stack > > > > tcp_rcv_state_process > > > > tcp_init_transfer > > > > //insert SK to sockmap > > > > tcp_set_state(sk, > > > > TCP_ESTABLISHED) > > > > wake up waitter > > > > > > Here after the socket is inserted to a sockmap, its > > > ->sk_data_ready() is already replaced with > > > sk_psock_verdict_data_ready(), so msgA should go to sockmap, not TCP > stack? > > > > > It is TCP stack. Here I only enable BPF_SK_MSG_VERDICT type. > > bpftool prog load bpf_redir.o /sys/fs/bpf/bpf_redir map name > > sock_ops_map pinned /sys/fs/bpf/sock_ops_map bpftool prog attach > > pinned /sys/fs/bpf/bpf_redir msg_verdict pinned > > /sys/fs/bpf/sock_ops_map > > Is the sender using FAST_OPEN by any chance? We know this bug exists in > this case. Fix tbd. FAST_OPEN is not used. The following test cases can be used to reproduce the OOO problem. But the worst-case scenario described in the problem (the arrival of msgA is later than the arrival of msgB), I have not been able to construct an inevitable case. tcp_server.c int server_port = 5006; int main(int argc, char *argv[]) { int serverSocket; struct sockaddr_in server_addr; struct sockaddr_in clientAddr; int addr_len = sizeof(clientAddr); int client; char buffer[200]; int iDataNum; int optbuf, ret; if (argc != 2) { return -1; } server_port = atoi(argv[1]); if( server_port<1025 || server_port>65535 ) { return -1; } if((serverSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return 1; } optbuf = 1; ret = setsockopt(serverSocket, SOL_SOCKET, SO_REUSEADDR, &optbuf, sizeof(int)); if (ret != 0) perror("reuseaddr failed"); bzero(&server_addr, sizeof(server_addr)); server_addr.sin_family = AF_INET; server_addr.sin_port = htons(server_port); server_addr.sin_addr.s_addr = htonl(INADDR_ANY); if(bind(serverSocket, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) { perror("connect"); return 1; } if(listen(serverSocket, 5) < 0) { perror("listen"); return 1; } while(1) { client = accept(serverSocket, (struct sockaddr*)&clientAddr, (socklen_t*)&addr_len); if(client < 0) { perror("accept"); continue; } printf("wait until the two msgs of client are sent...\n"); sleep(5); while(1) { printf("recvmsg:"); buffer[0] = '\0'; iDataNum = recv(client, buffer, 1024, 0); if(iDataNum < 0) { perror("recv null"); continue; } buffer[iDataNum] = '\0'; printf("%s\n", buffer); sleep(2); } } close(serverSocket); return 0; } tcp_client.c int server_port = 5006; int main(int argc, char *argv[]) { int clientSocket; struct sockaddr_in serverAddr; struct sockaddr_in clientAddr; char sendbuf[4096]; char recvbuf[4096]; int iDataNum; int ret; int client_port; if (argc != 3) { printf("client [sport] [dport]\n"); return -1; } client_port = atoi(argv[1]); if(client_port<1025 || client_port>65535 ) { return -1; } server_port = atoi(argv[2]); if( server_port<1025 || server_port>65535 ) { return -1; } if((clientSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return 1; } bzero(&clientAddr, sizeof(clientAddr)); clientAddr.sin_family = AF_INET; clientAddr.sin_port = htons(client_port); clientAddr.sin_addr.s_addr = htonl(INADDR_ANY); if(bind(clientSocket, (struct sockaddr *)&clientAddr, sizeof(clientAddr)) < 0) { perror("bind"); return 1; } bzero(&serverAddr, sizeof(serverAddr)); serverAddr.sin_family = AF_INET; serverAddr.sin_port = htons(server_port); serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); system("iptables -A INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP"); if(connect(clientSocket, (struct sockaddr *)&serverAddr, sizeof(serverAddr)) < 0) { perror("connect"); return 1; } memset(sendbuf, 0, sizeof(sendbuf)); memcpy(sendbuf, "AAAAAAAAAAA", 10); ret = send(clientSocket, sendbuf, strlen(sendbuf), 0); if (ret <= 0) { perror("send fail\n"); return -1; } printf("finish send A\n"); system("iptables -D INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP"); sleep(2); // wait serversk insert to sockmap printf("start send b\n"); memcpy(sendbuf, "bbbbbbbbbbbbb", 10); ret = send(clientSocket, sendbuf, strlen(sendbuf), 0); if (ret <= 0) { perror("send fail\n"); return -1; } sleep(10); close(clientSocket); return 0; } [root@localhost sockmap_test]# ./server 5006 wait until the two msgs of client are sent... recvmsg:bbbbbbbbbb recvmsg:AAAAAAAAAA ^C