On 03/11, Andrii Nakryiko wrote: > Switch to non-blocking accept and wait for server thread to exit before > proceeding. I noticed that sometimes tcp_rtt server thread failure would > "spill over" into other tests (that would run after tcp_rtt), probably just > because server thread exits much later and tcp_rtt doesn't wait for it. > > Fixes: 8a03222f508b ("selftests/bpf: test_progs: fix client/server race in tcp_rtt") > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > --- > .../selftests/bpf/prog_tests/tcp_rtt.c | 30 +++++++++++-------- > 1 file changed, 18 insertions(+), 12 deletions(-) > > diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c > index f4cd60d6fba2..d235eea0de27 100644 > --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c > +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c > @@ -188,7 +188,7 @@ static int start_server(void) > }; > int fd; > > - fd = socket(AF_INET, SOCK_STREAM, 0); > + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); > if (fd < 0) { > log_err("Failed to create server socket"); > return -1; > @@ -205,6 +205,7 @@ static int start_server(void) > > static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; > static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; > +static volatile bool server_done = false; > > static void *server_thread(void *arg) > { > @@ -222,23 +223,22 @@ static void *server_thread(void *arg) > > if (CHECK_FAIL(err < 0)) { > perror("Failed to listed on socket"); > - return NULL; > + return ERR_PTR(err); > } > > - client_fd = accept(fd, (struct sockaddr *)&addr, &len); > + while (!server_done) { > + client_fd = accept(fd, (struct sockaddr *)&addr, &len); > + if (client_fd == -1 && errno == EAGAIN) > + continue; > + break; > + } > if (CHECK_FAIL(client_fd < 0)) { > perror("Failed to accept client"); > - return NULL; > + return ERR_PTR(err); > } > > - /* Wait for the next connection (that never arrives) > - * to keep this thread alive to prevent calling > - * close() on client_fd. > - */ > - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { > - perror("Unexpected success in second accept"); > - return NULL; > - } > + while (!server_done) > + usleep(50); > > close(client_fd); > > @@ -249,6 +249,7 @@ void test_tcp_rtt(void) > { > int server_fd, cgroup_fd; > pthread_t tid; > + void *server_res; > > cgroup_fd = test__join_cgroup("/tcp_rtt"); > if (CHECK_FAIL(cgroup_fd < 0)) > @@ -267,6 +268,11 @@ void test_tcp_rtt(void) > pthread_mutex_unlock(&server_started_mtx); > > CHECK_FAIL(run_test(cgroup_fd, server_fd)); > + > + server_done = true; [..] > + pthread_join(tid, &server_res); > + CHECK_FAIL(IS_ERR(server_res)); I wonder if we add (move) close(server_fd) before pthread_join(), can we fix this issue without using non-blocking socket? The accept() should return as soon as server_fd is closed so it's essentially your 'server_done'. > + > close_server_fd: > close(server_fd); > close_cgroup_fd: > -- > 2.17.1 >