On Fri, Jan 27, 2023 at 1:16 PM Andrei Gherzan <andrei.gherzan@xxxxxxxxxxxxx> wrote: > > The tx and rx test programs are used in a couple of test scripts including > "udpgro_bench.sh". Taking this as an example, when the rx/tx programs > are invoked subsequently, there is a chance that the rx one is not ready to > accept socket connections. This racing bug could fail the test with at > least one of the following: > > ./udpgso_bench_tx: connect: Connection refused > ./udpgso_bench_tx: sendmsg: Connection refused > ./udpgso_bench_tx: write: Connection refused > > This change addresses this by adding routines that retry the socket > operations with an exponential back off algorithm from 100ms to 2s. > > Fixes: 3a687bef148d ("selftests: udp gso benchmark") > Signed-off-by: Andrei Gherzan <andrei.gherzan@xxxxxxxxxxxxx> Synchronizing the two processes is indeed tricky. Perhaps more robust is opening an initial TCP connection, with SO_RCVTIMEO to bound the waiting time. That covers all tests in one go. > --- > tools/testing/selftests/net/udpgso_bench_tx.c | 57 +++++++++++++------ > 1 file changed, 41 insertions(+), 16 deletions(-) > > diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c > index f1fdaa270291..4dea9ee7eb46 100644 > --- a/tools/testing/selftests/net/udpgso_bench_tx.c > +++ b/tools/testing/selftests/net/udpgso_bench_tx.c > @@ -53,6 +53,9 @@ > > #define NUM_PKT 100 > > +#define MAX_DELAY 2000000 > +#define INIT_DELAY 100000 > + > static bool cfg_cache_trash; > static int cfg_cpu = -1; > static int cfg_connected = true; > @@ -257,13 +260,18 @@ static void flush_errqueue(int fd, const bool do_poll) > static int send_tcp(int fd, char *data) > { > int ret, done = 0, count = 0; > + useconds_t delay = INIT_DELAY; > > while (done < cfg_payload_len) { > - ret = send(fd, data + done, cfg_payload_len - done, > - cfg_zerocopy ? MSG_ZEROCOPY : 0); > - if (ret == -1) > - error(1, errno, "write"); > - > + delay = INIT_DELAY; > + while ((ret = send(fd, data + done, cfg_payload_len - done, > + cfg_zerocopy ? MSG_ZEROCOPY : 0)) == -1) { > + usleep(delay); > + if (delay < MAX_DELAY) > + delay *= 2; > + else > + error(1, errno, "write"); > + } > done += ret; > count++; > } send_tcp should not be affected, as connect will by then already have succeeded. Also, as a reliable protocol it will internally retry, after returning with success. > @@ -274,17 +282,23 @@ static int send_tcp(int fd, char *data) > static int send_udp(int fd, char *data) > { > int ret, total_len, len, count = 0; > + useconds_t delay = INIT_DELAY; > > total_len = cfg_payload_len; > > while (total_len) { > len = total_len < cfg_mss ? total_len : cfg_mss; > > - ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, > - cfg_connected ? NULL : (void *)&cfg_dst_addr, > - cfg_connected ? 0 : cfg_alen); > - if (ret == -1) > - error(1, errno, "write"); > + delay = INIT_DELAY; > + while ((ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, > + cfg_connected ? NULL : (void *)&cfg_dst_addr, > + cfg_connected ? 0 : cfg_alen)) == -1) { should ideally only retry on the expected errno. Unreliable datagram sendto will succeed and initially. It will fail with error later, after reception of an ICMP dst unreachable response. > + usleep(delay); > + if (delay < MAX_DELAY) > + delay *= 2; > + else > + error(1, errno, "write"); > + } > if (ret != len) > error(1, errno, "write: %uB != %uB\n", ret, len); > > @@ -378,6 +392,7 @@ static int send_udp_segment(int fd, char *data) > struct iovec iov = {0}; > size_t msg_controllen; > struct cmsghdr *cmsg; > + useconds_t delay = INIT_DELAY; > int ret; > > iov.iov_base = data; > @@ -401,9 +416,13 @@ static int send_udp_segment(int fd, char *data) > msg.msg_name = (void *)&cfg_dst_addr; > msg.msg_namelen = cfg_alen; > > - ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); > - if (ret == -1) > - error(1, errno, "sendmsg"); > + while ((ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0)) == -1) { > + usleep(delay); > + if (delay < MAX_DELAY) > + delay *= 2; > + else > + error(1, errno, "sendmsg"); > + } > if (ret != iov.iov_len) > error(1, 0, "sendmsg: %u != %llu\n", ret, > (unsigned long long)iov.iov_len); > @@ -616,6 +635,7 @@ int main(int argc, char **argv) > { > unsigned long num_msgs, num_sends; > unsigned long tnow, treport, tstop; > + useconds_t delay = INIT_DELAY; > int fd, i, val, ret; > > parse_opts(argc, argv); > @@ -648,9 +668,14 @@ int main(int argc, char **argv) > } > } > > - if (cfg_connected && > - connect(fd, (void *)&cfg_dst_addr, cfg_alen)) > - error(1, errno, "connect"); > + if (cfg_connected) > + while (connect(fd, (void *)&cfg_dst_addr, cfg_alen)) { > + usleep(delay); > + if (delay < MAX_DELAY) > + delay *= 2; > + else > + error(1, errno, "connect"); > + } > > if (cfg_segment) > set_pmtu_discover(fd, cfg_family == PF_INET); > -- > 2.34.1 >