Re: [PATCH bpf-next v3 1/1] selftests/bpf: Fix dangling stdout seen by traffic monitor thread

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Feb 28, 2025 at 3:30 PM Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote:
>
> On 2/27/25 2:23 PM, Amery Hung wrote:
> > Traffic monitor thread may see dangling stdout as the main thread closes
> > and reassigns stdout without protection. This happens when the main thread
> > finishes one subtest and moves to another one in the same netns_new()
> > scope. Fix it by first consolidating stdout assignment into
> > stdio_restore_cleanup() and then protecting the use/close/reassignment of
> > stdout with a lock. The locking in the main thread is always performed
> > regradless of whether traffic monitor is running or not for simplicity.
> > It won't have any side-effect.
> >
> > The issue can be reproduced by running test_progs repeatedly with traffic
> > monitor enabled:
> >
> > for ((i=1;i<=100;i++)); do
> >     ./test_progs -a flow_dissector_skb* -m '*'
> > done
> >
> > Signed-off-by: Amery Hung <ameryhung@xxxxxxxxx>
> > ---
> >   tools/testing/selftests/bpf/network_helpers.c |  8 ++++-
> >   tools/testing/selftests/bpf/network_helpers.h |  6 ++--
> >   tools/testing/selftests/bpf/test_progs.c      | 29 +++++++++++++------
> >   3 files changed, 31 insertions(+), 12 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
> > index 737a952dcf80..5014fd063d67 100644
> > --- a/tools/testing/selftests/bpf/network_helpers.c
> > +++ b/tools/testing/selftests/bpf/network_helpers.c
> > @@ -743,6 +743,7 @@ struct tmonitor_ctx {
> >       pcap_t *pcap;
> >       pcap_dumper_t *dumper;
> >       pthread_t thread;
> > +     pthread_mutex_t *stdout_lock;
> >       int wake_fd;
> >
> >       volatile bool done;
> > @@ -953,6 +954,7 @@ static void *traffic_monitor_thread(void *arg)
> >               ifindex = ntohl(ifindex);
> >               ptype = packet[10];
> >
> > +             pthread_mutex_lock(ctx->stdout_lock);
> >               if (proto == ETH_P_IPV6) {
> >                       show_ipv6_packet(payload, ifindex, ptype);
> >               } else if (proto == ETH_P_IP) {
> > @@ -967,6 +969,7 @@ static void *traffic_monitor_thread(void *arg)
> >                       printf("%-7s %-3s Unknown network protocol type 0x%x\n",
> >                              ifname, pkt_type_str(ptype), proto);
> >               }
> > +             pthread_mutex_unlock(ctx->stdout_lock);
> >       }
> >
> >       return NULL;
> > @@ -1055,7 +1058,8 @@ static void encode_test_name(char *buf, size_t len, const char *test_name, const
> >    * in the give network namespace.
> >    */
> >   struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
> > -                                        const char *subtest_name)
> > +                                        const char *subtest_name,
> > +                                        pthread_mutex_t *stdout_lock)
>
> Thinking out loud here and see if the following will be better than passing a
> pthread_mutex_t.
>
> How about passing a print function pointer instead and this function can do what
> is needed before printf(), i.e. lock mutex here. Something like the
> "libbpf_print_fn_t __libbpf_pr" in libbpf.c.
>
> May be the test_progs.c can set it once by calling tm_set_print (similar to
> libbpf_set_print) instead of passing as an arg during every traffic_monitor_start().
>
> wdyt?
>

I think that will be cleaner. Especially explicit
pthread_mutext_lock/unlock() can then be removed from traffic monitor.
I will change to passing a print function pointer from test_progs.c to
traffic monitor.

> >   {
> >       struct nstoken *nstoken = NULL;
> >       struct tmonitor_ctx *ctx;
> > @@ -1109,6 +1113,8 @@ struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_n
> >               goto fail_eventfd;
> >       }
> >
> > +     ctx->stdout_lock = stdout_lock;
> > +
> >       r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
> >       if (r) {
> >               log_err("Failed to create thread");
> > diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
> > index 9f6e05d886c5..b80954eab8d8 100644
> > --- a/tools/testing/selftests/bpf/network_helpers.h
> > +++ b/tools/testing/selftests/bpf/network_helpers.h
> > @@ -251,11 +251,13 @@ struct tmonitor_ctx;
> >
> >   #ifdef TRAFFIC_MONITOR
> >   struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
> > -                                        const char *subtest_name);
> > +                                        const char *subtest_name,
> > +                                        pthread_mutex_t *stdout_lock);
> >   void traffic_monitor_stop(struct tmonitor_ctx *ctx);
> >   #else
> >   static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
> > -                                                      const char *subtest_name)
> > +                                                      const char *subtest_name,
> > +                                                      pthread_mutex_t *stdout_lock)
> >   {
> >       return NULL;
> >   }
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index 0cb759632225..db9ea69e8ba1 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -88,7 +88,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
> >   #endif
> >   }
> >
> > -static void stdio_restore_cleanup(void)
> > +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
> > +
> > +static void stdio_restore_cleanup(bool restore_default)
> >   {
> >   #ifdef __GLIBC__
> >       if (verbose() && env.worker_id == -1) {
> > @@ -98,15 +100,25 @@ static void stdio_restore_cleanup(void)
> >
> >       fflush(stdout);
> >
> > +     pthread_mutex_lock(&stdout_lock);
> > +
> >       if (env.subtest_state) {
> >               fclose(env.subtest_state->stdout_saved);
> >               env.subtest_state->stdout_saved = NULL;
> > -             stdout = env.test_state->stdout_saved;
> > -             stderr = env.test_state->stdout_saved;
> >       } else {
> >               fclose(env.test_state->stdout_saved);
> >               env.test_state->stdout_saved = NULL;
> >       }
> > +
> > +     if (restore_default) {
>
> Why a new "bool restore_default" is needed? Testing env.subtest_state is not enough?
>

If a crash happens during a subtest, env.subtest_state will still be
set and stdout and stderr will not be restored to the default ones.

Thanks,
Amery

> Thanks for debugging this.
>
> > +             stdout = env.stdout_saved;
> > +             stderr = env.stderr_saved;
> > +     } else if (env.subtest_state) {
> > +             stdout = env.test_state->stdout_saved;
> > +             stderr = env.test_state->stdout_saved;
> > +     }
> > +
> > +     pthread_mutex_unlock(&stdout_lock);
> >   #endif
> >   }
> >
> > @@ -121,10 +133,7 @@ static void stdio_restore(void)
> >       if (stdout == env.stdout_saved)
> >               return;
> >
> > -     stdio_restore_cleanup();
> > -
> > -     stdout = env.stdout_saved;
> > -     stderr = env.stderr_saved;
> > +     stdio_restore_cleanup(true);
> >   #endif
> >   }
> >
> > @@ -541,7 +550,8 @@ void test__end_subtest(void)
> >                                  test_result(subtest_state->error_cnt,
> >                                              subtest_state->skipped));
> >
> > -     stdio_restore_cleanup();
> > +     stdio_restore_cleanup(false);
> > +
> >       env.subtest_state = NULL;
> >   }
> >
> > @@ -779,7 +789,8 @@ struct netns_obj *netns_new(const char *nsname, bool open)
> >           (env.subtest_state && env.subtest_state->should_tmon)) {
> >               test_name = env.test->test_name;
> >               subtest_name = env.subtest_state ? env.subtest_state->name : NULL;
> > -             netns_obj->tmon = traffic_monitor_start(nsname, test_name, subtest_name);
> > +             netns_obj->tmon = traffic_monitor_start(nsname, test_name, subtest_name,
> > +                                                     &stdout_lock);
> >               if (!netns_obj->tmon) {
> >                       fprintf(stderr, "Failed to start traffic monitor for %s\n", nsname);
> >                       goto fail;
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux