On Fri, Feb 28, 2025 at 3:30 PM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 2/27/25 2:23 PM, Amery Hung wrote: > > Traffic monitor thread may see dangling stdout as the main thread closes > > and reassigns stdout without protection. This happens when the main thread > > finishes one subtest and moves to another one in the same netns_new() > > scope. Fix it by first consolidating stdout assignment into > > stdio_restore_cleanup() and then protecting the use/close/reassignment of > > stdout with a lock. The locking in the main thread is always performed > > regradless of whether traffic monitor is running or not for simplicity. > > It won't have any side-effect. > > > > The issue can be reproduced by running test_progs repeatedly with traffic > > monitor enabled: > > > > for ((i=1;i<=100;i++)); do > > ./test_progs -a flow_dissector_skb* -m '*' > > done > > > > Signed-off-by: Amery Hung <ameryhung@xxxxxxxxx> > > --- > > tools/testing/selftests/bpf/network_helpers.c | 8 ++++- > > tools/testing/selftests/bpf/network_helpers.h | 6 ++-- > > tools/testing/selftests/bpf/test_progs.c | 29 +++++++++++++------ > > 3 files changed, 31 insertions(+), 12 deletions(-) > > > > diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c > > index 737a952dcf80..5014fd063d67 100644 > > --- a/tools/testing/selftests/bpf/network_helpers.c > > +++ b/tools/testing/selftests/bpf/network_helpers.c > > @@ -743,6 +743,7 @@ struct tmonitor_ctx { > > pcap_t *pcap; > > pcap_dumper_t *dumper; > > pthread_t thread; > > + pthread_mutex_t *stdout_lock; > > int wake_fd; > > > > volatile bool done; > > @@ -953,6 +954,7 @@ static void *traffic_monitor_thread(void *arg) > > ifindex = ntohl(ifindex); > > ptype = packet[10]; > > > > + pthread_mutex_lock(ctx->stdout_lock); > > if (proto == ETH_P_IPV6) { > > show_ipv6_packet(payload, ifindex, ptype); > > } else if (proto == ETH_P_IP) { > > @@ -967,6 +969,7 @@ static void *traffic_monitor_thread(void *arg) > > printf("%-7s %-3s Unknown network protocol type 0x%x\n", > > ifname, pkt_type_str(ptype), proto); > > } > > + pthread_mutex_unlock(ctx->stdout_lock); > > } > > > > return NULL; > > @@ -1055,7 +1058,8 @@ static void encode_test_name(char *buf, size_t len, const char *test_name, const > > * in the give network namespace. > > */ > > struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, > > - const char *subtest_name) > > + const char *subtest_name, > > + pthread_mutex_t *stdout_lock) > > Thinking out loud here and see if the following will be better than passing a > pthread_mutex_t. > > How about passing a print function pointer instead and this function can do what > is needed before printf(), i.e. lock mutex here. Something like the > "libbpf_print_fn_t __libbpf_pr" in libbpf.c. > > May be the test_progs.c can set it once by calling tm_set_print (similar to > libbpf_set_print) instead of passing as an arg during every traffic_monitor_start(). > > wdyt? > I think that will be cleaner. Especially explicit pthread_mutext_lock/unlock() can then be removed from traffic monitor. I will change to passing a print function pointer from test_progs.c to traffic monitor. > > { > > struct nstoken *nstoken = NULL; > > struct tmonitor_ctx *ctx; > > @@ -1109,6 +1113,8 @@ struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_n > > goto fail_eventfd; > > } > > > > + ctx->stdout_lock = stdout_lock; > > + > > r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); > > if (r) { > > log_err("Failed to create thread"); > > diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h > > index 9f6e05d886c5..b80954eab8d8 100644 > > --- a/tools/testing/selftests/bpf/network_helpers.h > > +++ b/tools/testing/selftests/bpf/network_helpers.h > > @@ -251,11 +251,13 @@ struct tmonitor_ctx; > > > > #ifdef TRAFFIC_MONITOR > > struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, > > - const char *subtest_name); > > + const char *subtest_name, > > + pthread_mutex_t *stdout_lock); > > void traffic_monitor_stop(struct tmonitor_ctx *ctx); > > #else > > static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, > > - const char *subtest_name) > > + const char *subtest_name, > > + pthread_mutex_t *stdout_lock) > > { > > return NULL; > > } > > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c > > index 0cb759632225..db9ea69e8ba1 100644 > > --- a/tools/testing/selftests/bpf/test_progs.c > > +++ b/tools/testing/selftests/bpf/test_progs.c > > @@ -88,7 +88,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt) > > #endif > > } > > > > -static void stdio_restore_cleanup(void) > > +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER; > > + > > +static void stdio_restore_cleanup(bool restore_default) > > { > > #ifdef __GLIBC__ > > if (verbose() && env.worker_id == -1) { > > @@ -98,15 +100,25 @@ static void stdio_restore_cleanup(void) > > > > fflush(stdout); > > > > + pthread_mutex_lock(&stdout_lock); > > + > > if (env.subtest_state) { > > fclose(env.subtest_state->stdout_saved); > > env.subtest_state->stdout_saved = NULL; > > - stdout = env.test_state->stdout_saved; > > - stderr = env.test_state->stdout_saved; > > } else { > > fclose(env.test_state->stdout_saved); > > env.test_state->stdout_saved = NULL; > > } > > + > > + if (restore_default) { > > Why a new "bool restore_default" is needed? Testing env.subtest_state is not enough? > If a crash happens during a subtest, env.subtest_state will still be set and stdout and stderr will not be restored to the default ones. Thanks, Amery > Thanks for debugging this. > > > + stdout = env.stdout_saved; > > + stderr = env.stderr_saved; > > + } else if (env.subtest_state) { > > + stdout = env.test_state->stdout_saved; > > + stderr = env.test_state->stdout_saved; > > + } > > + > > + pthread_mutex_unlock(&stdout_lock); > > #endif > > } > > > > @@ -121,10 +133,7 @@ static void stdio_restore(void) > > if (stdout == env.stdout_saved) > > return; > > > > - stdio_restore_cleanup(); > > - > > - stdout = env.stdout_saved; > > - stderr = env.stderr_saved; > > + stdio_restore_cleanup(true); > > #endif > > } > > > > @@ -541,7 +550,8 @@ void test__end_subtest(void) > > test_result(subtest_state->error_cnt, > > subtest_state->skipped)); > > > > - stdio_restore_cleanup(); > > + stdio_restore_cleanup(false); > > + > > env.subtest_state = NULL; > > } > > > > @@ -779,7 +789,8 @@ struct netns_obj *netns_new(const char *nsname, bool open) > > (env.subtest_state && env.subtest_state->should_tmon)) { > > test_name = env.test->test_name; > > subtest_name = env.subtest_state ? env.subtest_state->name : NULL; > > - netns_obj->tmon = traffic_monitor_start(nsname, test_name, subtest_name); > > + netns_obj->tmon = traffic_monitor_start(nsname, test_name, subtest_name, > > + &stdout_lock); > > if (!netns_obj->tmon) { > > fprintf(stderr, "Failed to start traffic monitor for %s\n", nsname); > > goto fail; >