On Tue, 9 Jul 2024 at 20:54, Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote: > > Add a selftest that tries to trigger a situation where two timer > callbacks are attempting to cancel each other's timer. By running them > continuously, we hit a condition where both run in parallel and cancel > each other. Without the fix in the previous patch, this would cause a > lockup as hrtimer_cancel on either side will wait for forward progress > from the callback. > > Ensure that this situation leads to a EDEADLK error. > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> > --- > .../selftests/bpf/prog_tests/timer_lockup.c | 65 ++++++++++++++ > .../selftests/bpf/progs/timer_lockup.c | 85 +++++++++++++++++++ > 2 files changed, 150 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/timer_lockup.c > create mode 100644 tools/testing/selftests/bpf/progs/timer_lockup.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c > new file mode 100644 > index 000000000000..73e376fc5bbd > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c > @@ -0,0 +1,65 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#define _GNU_SOURCE > +#include <sched.h> > +#include <test_progs.h> > +#include <pthread.h> > +#include <network_helpers.h> > +#include "timer_lockup.skel.h" > + > +long cpu; > +int *timer1_err; > +int *timer2_err; > + > +static void *timer_lockup_thread(void *arg) > +{ > + LIBBPF_OPTS(bpf_test_run_opts, opts, > + .data_in = &pkt_v4, > + .data_size_in = sizeof(pkt_v4), > + .repeat = 10000, > + ); > + int prog_fd = *(int *)arg; > + cpu_set_t cpuset; > + > + CPU_ZERO(&cpuset); > + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); > + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity"); > + > + while (!*timer1_err && !*timer2_err) > + bpf_prog_test_run_opts(prog_fd, &opts); > + > + return NULL; > +} > + > +void test_timer_lockup(void) > +{ > + struct timer_lockup *skel; > + pthread_t thrds[2]; > + void *ret; > + > + skel = timer_lockup__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) > + return; > + > + int timer1_prog = bpf_program__fd(skel->progs.timer1_prog); > + int timer2_prog = bpf_program__fd(skel->progs.timer2_prog); > + > + timer1_err = &skel->bss->timer1_err; > + timer2_err = &skel->bss->timer2_err; > + > + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, &timer1_prog), "pthread_create thread1")) > + return; > + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, &timer2_prog), "pthread_create thread2")) { > + pthread_exit(&thrds[0]); > + return; A goto out: timer_lockup___destroy(skel) is missing here and above this. Will wait for a day or so before respinning. > [...]