Tuning of the effective buffer size through setsockopts was working for SMC traffic only but not for TCP fall-back connections even before commit 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable"). That change made it apparent that TCP fall-back connections would use net.smc.[rw]mem as buffer size instead of net.ipv4_tcp_[rw]mem. Amend the code that copies attributes between the (TCP) clcsock and the SMC socket and adjust buffer sizes appropriately: - Copy over sk_userlocks so that both sockets agree on whether tuning via setsockopt is active. - When falling back to TCP use sk_sndbuf or sk_rcvbuf as specified with setsockopt. Otherwise, use the sysctl value for TCP/IPv4. - Likewise, use either values from setsockopt or from sysctl for SMC (duplicated) on successful SMC connect. In smc_tcp_listen_work() drop the explicit copy of buffer sizes as that is taken care of by the attribute copy.
[...]
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */ +static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, + unsigned long mask) +{ + struct net *nnet = sock_net(nsk); + + nsk->sk_userlocks = osk->sk_userlocks; + if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) { + nsk->sk_sndbuf = osk->sk_sndbuf; + } else { + if (mask == SK_FLAGS_SMC_TO_CLC) + WRITE_ONCE(nsk->sk_sndbuf, + READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
Hi Gerd, I noticed that during TCP connection establishment, tcp_sndbuf_expand() will tune sk->sk_sndbuf, that causes clcsock's sk_sndbuf to no longer be sysctl_tcp_wmem[1]. But here we set it back to sysctl_tcp_wmem[1]. So I did some tests to see if the values of sk_sndbuf and sk_rcvbuf are as expected in SMC and fallback cases (see the attached server.c and client.c for the reproducer and here are the sysctl values in my environment) net.ipv4.tcp_wmem = 4096 4096 16777216 net.ipv4.tcp_rmem = 4096 4096 16777216 net.smc.wmem = 65536 net.smc.rmem = 65536 1. No additional sk_{snd|rcv}buf settings 1.1 TCP ./server ./client -i <serv_ip> results: - server: sndbuf_size 87040, rcvbuf_size 4096 - client: sndbuf_size 87040, rcvbuf_size 4096 1.2 SMC smc_run ./server smc_run ./client -i <serv_ip> results: - server: sndbuf_size 131072, rcvbuf_size 131072 - client: sndbuf_size 131072, rcvbuf_size 131072 1.3 SMC, but server fallback smc_run ./server ./client -i <serv_ip> results: - server: sndbuf_size 87040, rcvbuf_size 4096 - client: sndbuf_size 87040, rcvbuf_size 4096 1.4 SMC, but client fallback ./server smc_run ./client -i <serv_ip> results: - server: sndbuf_size 87040, rcvbuf_size 4096 - client: sndbuf_size 4096, rcvbuf_size 4096 <--- I think clcsock's sk_sndbuf should be the same as 1.1 after fallback? 2. Set server listen sock's and client sock's sk_{snd|rcv}buf as 16KB by setsockopt() before connection establishment. 2.1 TCP ./server -s 16384 ./client -i <serv_ip> -s 16384 results: - server: sndbuf_size 32768, rcvbuf_size 32768 - client: sndbuf_size 32768, rcvbuf_size 32768 2.2 SMC smc_run ./server -s 16384 smc_run ./client -i <serv_ip> -s 16384 results: - server: sndbuf_size 32768, rcvbuf_size 32768 - client: sndbuf_size 32768, rcvbuf_size 32768 2.3 SMC, but server fallback smc_run ./server -s 16384 ./client -i <serv_ip> -s 16384 results: - server: sndbuf_size 32768, rcvbuf_size 32768 - client: sndbuf_size 32768, rcvbuf_size 32768 2.4 SMC, but client fallback ./server -s 16384 smc_run ./client -i <serv_ip> -s 16384 results: - server: sndbuf_size 32768, rcvbuf_size 32768 - client: sndbuf_size 32768, rcvbuf_size 32768 In the above 8 sets of tests, 1.4 does not seem to meet expectations. It is because we reset clcsock's sk_sndbuf to sysctl_tcp_wmem[1] in smc_copy_sock_settings_to_clc(). I think it should be like 1.1 TCP values after fallback. What do you think? If so, we may need to avoid setting sysctl value to clcsock's sk_sndbuf in smc_adjust_sock_bufsizes(). Furthermore, maybe all the setting-sysctl-value can be omitted, since smc sock's and clcsock's sk_{snd|rcv}buf have been set to sysctl value during their sock initialization (smc_sock_alloc() and tcp_init_sock()). And another question is why 1.3 is as expected? The direct cause is that server does not call smc_copy_sock_settings_to_clc() when fallback, like the client smc_connect_fallback() does. But I didn't figure out what is the reason for the different behavior? Do you have any information? Thanks a lot! Best regards, Wen Gu
+ else + WRITE_ONCE(nsk->sk_sndbuf, + 2 * READ_ONCE(nnet->smc.sysctl_wmem)); + } + if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) { + nsk->sk_rcvbuf = osk->sk_rcvbuf; + } else { + if (mask == SK_FLAGS_SMC_TO_CLC) + WRITE_ONCE(nsk->sk_rcvbuf, + READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1])); + else + WRITE_ONCE(nsk->sk_rcvbuf, + 2 * READ_ONCE(nnet->smc.sysctl_rmem)); + } +} +
[...]
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <arpa/inet.h> #include <sys/socket.h> #include <netinet/in.h> #include <stdbool.h> #include <errno.h> #include <netinet/tcp.h> #ifndef AF_SMC #define AF_SMC 43 #endif #define NET_PROTOCAL AF_INET #define SERV_IP "11.213.5.33" #define SERV_PORT 10012 char *ip; int net_clnt(int buf_size, int port) { int sndbuf_size, rcvbuf_size; struct sockaddr_in s_addr; char msg[128] = { 0 }; int optlen = 4; int sock; int rc; if (!port) port = SERV_PORT; sock = socket(NET_PROTOCAL, SOCK_STREAM, 0); if (buf_size) { sndbuf_size = rcvbuf_size = buf_size; /* set sndbuf and rcvbuf */ if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { printf("set sndbuf failed\n"); return 0; } if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { printf("set rcvbuf failed\n"); return 0; } } memset(&s_addr, 0, sizeof(s_addr)); s_addr.sin_family = NET_PROTOCAL; if (ip) s_addr.sin_addr.s_addr = inet_addr(ip); else s_addr.sin_addr.s_addr = inet_addr(SERV_IP); s_addr.sin_port = htons(port); if (connect(sock, (struct sockaddr*)&s_addr, sizeof(s_addr))){ printf("connect fail\n"); return 0; } sndbuf_size = 0; rcvbuf_size = 0; getsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, &optlen); getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, &optlen); printf("client: sndbuf_size %d, rcvbuf_size %d\n", sndbuf_size, rcvbuf_size); recv(sock, msg, sizeof(msg), 0); printf("get msg: %s\n", msg); send(sock, "Response", sizeof("Response"), MSG_NOSIGNAL); close(sock); } int main(int argc, char **argv){ bool wrong_param = false; int buf_size = 0, port = 0; int c; while(!wrong_param && (-1 != (c = getopt(argc, argv, "p:s:i:")))) { switch (c) { case 's': buf_size = atoi(optarg); break; case 'i': ip = strdup(optarg); break; case 'p': port = atoi(optarg); break; case '?': printf("usage: ./client -s <bufsize> -i <ip> -p <port>\n"); wrong_param = true; break; } } if (!wrong_param) net_clnt(buf_size, port); return 0; }
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <arpa/inet.h> #include <sys/socket.h> #include <netinet/in.h> #include <errno.h> #include <stdbool.h> #include <netinet/tcp.h> #ifndef AF_SMC #define AF_SMC 43 #endif #define NET_PROTOCAL AF_INET #define SERV_IP "0.0.0.0" #define SERV_PORT 10012 int net_serv(int buf_size, int port) { int sndbuf_size, rcvbuf_size; struct sockaddr_in s_addr; struct sockaddr_in c_addr; char msg[128] = "Request"; int l_sock, s_sock; int optlen = 4; if (!port) port = SERV_PORT; l_sock = socket(NET_PROTOCAL, SOCK_STREAM, 0); if (buf_size) { sndbuf_size = rcvbuf_size = buf_size; /* set sndbuf and rcvbuf */ if (setsockopt(l_sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { printf("set sndbuf failed\n"); return 0; } if (setsockopt(l_sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { printf("set rcvbuf failed\n", rcvbuf_size); return 0; } } memset(&s_addr, 0, sizeof(struct sockaddr_in)); s_addr.sin_family = NET_PROTOCAL; s_addr.sin_addr.s_addr = inet_addr(SERV_IP); s_addr.sin_port = htons(port); if (bind(l_sock, (struct sockaddr*)&s_addr, sizeof(s_addr))) { printf("bind listen socket error %d\n", errno); return 0; } if (listen(l_sock, 20)) { printf("listen error\n"); return 0; } socklen_t c_addr_len = sizeof(c_addr); s_sock = accept(l_sock, (struct sockaddr*)&c_addr, &c_addr_len); if (s_sock < 0) { printf("accept fail\n"); return 0; } else { char ip[16] = { 0 }; inet_ntop(NET_PROTOCAL, &(c_addr.sin_addr), ip, INET_ADDRSTRLEN); printf("accept connection: ip %s port %d\n", ip, c_addr.sin_port); } getsockopt(s_sock, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, &optlen); getsockopt(s_sock, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, &optlen); printf("server: sndbuf_size %d, rcvbuf_size %d\n", sndbuf_size, rcvbuf_size); send(s_sock, "Request", sizeof("Request"), MSG_NOSIGNAL); recv(s_sock, msg, sizeof(msg), 0); printf("get msg: %s\n", msg); close(s_sock); close(l_sock); return 0; } int main(int argc, char **argv) { bool wrong_param = false; int buf_size = 0, port = 0; int c; while(!wrong_param && (-1 != (c = getopt(argc, argv, "p:s:")))) { switch (c) { case 's': buf_size = atoi(optarg); break; case 'p': port = atoi(optarg); break; case '?': printf("usage: ./server -s <bufsize> -p <port>\n"); wrong_param = true; break; } } if (!wrong_param) net_serv(buf_size, port); return 0; }