> On Jan 10, 2023, at 9:33 AM, Martin KaFai Lau <martin.lau@xxxxxxxxx> wrote: > > On 1/5/23 1:26 AM, tong@xxxxxxxxxxxxx wrote: >> diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> new file mode 100644 >> index 000000000000..137dce8f1346 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c >> @@ -0,0 +1,75 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2022 DiDi Global Inc. */ >> +#define _GNU_SOURCE >> +#include <pthread.h> >> +#include <sched.h> >> +#include <test_progs.h> >> + >> +#include "htab_deadlock.skel.h" >> + >> +static int perf_event_open(void) >> +{ >> + struct perf_event_attr attr = {0}; >> + int pfd; >> + >> + /* create perf event on CPU 0 */ >> + attr.size = sizeof(attr); >> + attr.type = PERF_TYPE_HARDWARE; >> + attr.config = PERF_COUNT_HW_CPU_CYCLES; >> + attr.freq = 1; >> + attr.sample_freq = 1000; >> + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); >> + >> + return pfd >= 0 ? pfd : -errno; >> +} >> + >> +void test_htab_deadlock(void) >> +{ >> + unsigned int val = 0, key = 20; >> + struct bpf_link *link = NULL; >> + struct htab_deadlock *skel; >> + int err, i, pfd; >> + cpu_set_t cpus; >> + >> + skel = htab_deadlock__open_and_load(); >> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) >> + return; >> + >> + err = htab_deadlock__attach(skel); >> + if (!ASSERT_OK(err, "skel_attach")) >> + goto clean_skel; >> + >> + /* NMI events. */ >> + pfd = perf_event_open(); >> + if (pfd < 0) { >> + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { >> + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); >> + test__skip(); > > This test is a SKIP in bpf CI, so it won't be useful. > https://github.com/kernel-patches/bpf/actions/runs/3858084722/jobs/6579470256#step:6:5198 > > Is there other way to test it or do you know what may be missing in vmtest.sh? Not sure if the cloud setup in CI blocks HW_CPU_CYCLES. If it is, I also don't know a good way (Cc: Manu). Hi Other test cases using PERF_COUNT_HW_CPU_CYCLES were skipped too. For example, send_signal find_vma get_stackid_cannot_attach > >> + goto clean_skel; >> + } >> + if (!ASSERT_GE(pfd, 0, "perf_event_open")) >> + goto clean_skel; >> + } >> + >> + link = bpf_program__attach_perf_event(skel->progs.bpf_empty, pfd); >> + if (!ASSERT_OK_PTR(link, "attach_perf_event")) >> + goto clean_pfd; >> + >> + /* Pinned on CPU 0 */ >> + CPU_ZERO(&cpus); >> + CPU_SET(0, &cpus); >> + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); >> + >> + /* update bpf map concurrently on CPU0 in NMI and Task context. >> + * there should be no kernel deadlock. >> + */ >> + for (i = 0; i < 100000; i++) >> + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), >> + &key, &val, BPF_ANY); >> + >> + bpf_link__destroy(link); >> +clean_pfd: >> + close(pfd); >> +clean_skel: >> + htab_deadlock__destroy(skel); >> +} >> diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c >> new file mode 100644 >> index 000000000000..dacd003b1ccb >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c >> @@ -0,0 +1,30 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2022 DiDi Global Inc. */ >> +#include <linux/bpf.h> >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +struct { >> + __uint(type, BPF_MAP_TYPE_HASH); >> + __uint(max_entries, 2); >> + __uint(map_flags, BPF_F_ZERO_SEED); >> + __type(key, unsigned int); >> + __type(value, unsigned int); >> +} htab SEC(".maps"); >> + >> +SEC("fentry/perf_event_overflow") >> +int bpf_nmi_handle(struct pt_regs *regs) >> +{ >> + unsigned int val = 0, key = 4; >> + >> + bpf_map_update_elem(&htab, &key, &val, BPF_ANY); > > I ran it in my qemu setup which does not skip the test. I got this splat though: This is a false alarm, not deadlock(this patch fix deadlock, only). I fix waring in other patch, please review https://patchwork.kernel.org/project/netdevbpf/patch/20230105112749.38421-1-tong@xxxxxxxxxxxxx/ > > [ 42.990306] ================================ > [ 42.990307] WARNING: inconsistent lock state > [ 42.990310] 6.2.0-rc2-00304-gaf88a1bb9967 #409 Tainted: G O > [ 42.990313] -------------------------------- > [ 42.990315] inconsistent {INITIAL USE} -> {IN-NMI} usage. > [ 42.990317] test_progs/1546 [HC1[1]:SC0[0]:HE0:SE1] takes: > [ 42.990322] ffff888101245768 (&htab->lockdep_key){....}-{2:2}, at: htab_map_update_elem+0x1e7/0x810 > [ 42.990340] {INITIAL USE} state was registered at: > [ 42.990341] lock_acquire+0x1e6/0x530 > [ 42.990351] _raw_spin_lock_irqsave+0xb8/0x100 > [ 42.990362] htab_map_update_elem+0x1e7/0x810 > [ 42.990365] bpf_map_update_value+0x40d/0x4f0 > [ 42.990371] map_update_elem+0x423/0x580 > [ 42.990375] __sys_bpf+0x54e/0x670 > [ 42.990377] __x64_sys_bpf+0x7c/0x90 > [ 42.990382] do_syscall_64+0x43/0x90 > [ 42.990387] entry_SYSCALL_64_after_hwframe+0x72/0xdc > > Please check. > >> + return 0; >> +} >> + >> +SEC("perf_event") >> +int bpf_empty(struct pt_regs *regs) >> +{ > > btw, from a quick look at __perf_event_overflow, I suspect doing the bpf_map_update_elem() here instead of the fentry/perf_event_overflow above can also reproduce the patch 1 issue? No bpf_overflow_handler will check the bpf_prog_active, if syscall increase it, bpf_overflow_handler will skip the bpf prog. Fentry will not check the bpf_prog_active, and interrupt the task context. We have discussed that. > >> + return 0; >> +} > >