a On Thu, Dec 29, 2022 at 2:29 PM Yonghong Song <yhs@xxxxxxxx> wrote: > > > > On 12/28/22 2:24 PM, Alexei Starovoitov wrote: > > On Tue, Dec 27, 2022 at 8:43 PM Yonghong Song <yhs@xxxxxxxx> wrote: > >> > >> > >> > >> On 12/18/22 8:15 PM, xiangxia.m.yue@xxxxxxxxx wrote: > >>> From: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> > >>> > >>> This testing show how to reproduce deadlock in special case. > >>> We update htab map in Task and NMI context. Task can be interrupted by > >>> NMI, if the same map bucket was locked, there will be a deadlock. > >>> > >>> * map max_entries is 2. > >>> * NMI using key 4 and Task context using key 20. > >>> * so same bucket index but map_locked index is different. > >>> > >>> The selftest use perf to produce the NMI and fentry nmi_handle. > >>> Note that bpf_overflow_handler checks bpf_prog_active, but in bpf update > >>> map syscall increase this counter in bpf_disable_instrumentation. > >>> Then fentry nmi_handle and update hash map will reproduce the issue. > >>> > >>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> > >>> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> > >>> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> > >>> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx> > >>> Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx> > >>> Cc: Song Liu <song@xxxxxxxxxx> > >>> Cc: Yonghong Song <yhs@xxxxxx> > >>> Cc: John Fastabend <john.fastabend@xxxxxxxxx> > >>> Cc: KP Singh <kpsingh@xxxxxxxxxx> > >>> Cc: Stanislav Fomichev <sdf@xxxxxxxxxx> > >>> Cc: Hao Luo <haoluo@xxxxxxxxxx> > >>> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> > >>> Cc: Hou Tao <houtao1@xxxxxxxxxx> > >>> Acked-by: Yonghong Song <yhs@xxxxxx> > >>> --- > >>> tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + > >>> tools/testing/selftests/bpf/DENYLIST.s390x | 1 + > >>> .../selftests/bpf/prog_tests/htab_deadlock.c | 75 +++++++++++++++++++ > >>> .../selftests/bpf/progs/htab_deadlock.c | 32 ++++++++ > >>> 4 files changed, 109 insertions(+) > >>> create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > >>> create mode 100644 tools/testing/selftests/bpf/progs/htab_deadlock.c > >>> > >>> diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 > >>> index 99cc33c51eaa..87e8fc9c9df2 100644 > >>> --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 > >>> +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 > >>> @@ -24,6 +24,7 @@ fexit_test # fexit_attach unexpected error > >>> get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) > >>> get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) > >>> htab_update/reenter_update > >>> +htab_deadlock # failed to find kernel BTF type ID of 'nmi_handle': -3 (trampoline) > >>> kfree_skb # attach fentry unexpected error: -524 (trampoline) > >>> kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF > >>> kfunc_call/subprog_lskel # skel unexpected error: -2 > >>> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x > >>> index 585fcf73c731..735239b31050 100644 > >>> --- a/tools/testing/selftests/bpf/DENYLIST.s390x > >>> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x > >>> @@ -26,6 +26,7 @@ get_func_args_test # trampoline > >>> get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) > >>> get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) > >>> htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) > >>> +htab_deadlock # failed to find kernel BTF type ID of 'nmi_handle': -3 (trampoline) > >>> kfree_skb # attach fentry unexpected error: -524 (trampoline) > >>> kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) > >>> kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) > >>> diff --git a/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > >>> new file mode 100644 > >>> index 000000000000..137dce8f1346 > >>> --- /dev/null > >>> +++ b/tools/testing/selftests/bpf/prog_tests/htab_deadlock.c > >>> @@ -0,0 +1,75 @@ > >>> +// SPDX-License-Identifier: GPL-2.0 > >>> +/* Copyright (c) 2022 DiDi Global Inc. */ > >>> +#define _GNU_SOURCE > >>> +#include <pthread.h> > >>> +#include <sched.h> > >>> +#include <test_progs.h> > >>> + > >>> +#include "htab_deadlock.skel.h" > >>> + > >>> +static int perf_event_open(void) > >>> +{ > >>> + struct perf_event_attr attr = {0}; > >>> + int pfd; > >>> + > >>> + /* create perf event on CPU 0 */ > >>> + attr.size = sizeof(attr); > >>> + attr.type = PERF_TYPE_HARDWARE; > >>> + attr.config = PERF_COUNT_HW_CPU_CYCLES; > >>> + attr.freq = 1; > >>> + attr.sample_freq = 1000; > >>> + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); > >>> + > >>> + return pfd >= 0 ? pfd : -errno; > >>> +} > >>> + > >>> +void test_htab_deadlock(void) > >>> +{ > >>> + unsigned int val = 0, key = 20; > >>> + struct bpf_link *link = NULL; > >>> + struct htab_deadlock *skel; > >>> + int err, i, pfd; > >>> + cpu_set_t cpus; > >>> + > >>> + skel = htab_deadlock__open_and_load(); > >>> + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) > >>> + return; > >>> + > >>> + err = htab_deadlock__attach(skel); > >>> + if (!ASSERT_OK(err, "skel_attach")) > >>> + goto clean_skel; > >>> + > >>> + /* NMI events. */ > >>> + pfd = perf_event_open(); > >>> + if (pfd < 0) { > >>> + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { > >>> + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); > >>> + test__skip(); > >>> + goto clean_skel; > >>> + } > >>> + if (!ASSERT_GE(pfd, 0, "perf_event_open")) > >>> + goto clean_skel; > >>> + } > >>> + > >>> + link = bpf_program__attach_perf_event(skel->progs.bpf_empty, pfd); > >>> + if (!ASSERT_OK_PTR(link, "attach_perf_event")) > >>> + goto clean_pfd; > >>> + > >>> + /* Pinned on CPU 0 */ > >>> + CPU_ZERO(&cpus); > >>> + CPU_SET(0, &cpus); > >>> + pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus); > >>> + > >>> + /* update bpf map concurrently on CPU0 in NMI and Task context. > >>> + * there should be no kernel deadlock. > >>> + */ > >>> + for (i = 0; i < 100000; i++) > >>> + bpf_map_update_elem(bpf_map__fd(skel->maps.htab), > >>> + &key, &val, BPF_ANY); > >>> + > >>> + bpf_link__destroy(link); > >>> +clean_pfd: > >>> + close(pfd); > >>> +clean_skel: > >>> + htab_deadlock__destroy(skel); > >>> +} > >>> diff --git a/tools/testing/selftests/bpf/progs/htab_deadlock.c b/tools/testing/selftests/bpf/progs/htab_deadlock.c > >>> new file mode 100644 > >>> index 000000000000..d394f95e97c3 > >>> --- /dev/null > >>> +++ b/tools/testing/selftests/bpf/progs/htab_deadlock.c > >>> @@ -0,0 +1,32 @@ > >>> +// SPDX-License-Identifier: GPL-2.0 > >>> +/* Copyright (c) 2022 DiDi Global Inc. */ > >>> +#include <linux/bpf.h> > >>> +#include <bpf/bpf_helpers.h> > >>> +#include <bpf/bpf_tracing.h> > >>> + > >>> +char _license[] SEC("license") = "GPL"; > >>> + > >>> +struct { > >>> + __uint(type, BPF_MAP_TYPE_HASH); > >>> + __uint(max_entries, 2); > >>> + __uint(map_flags, BPF_F_ZERO_SEED); > >>> + __type(key, unsigned int); > >>> + __type(value, unsigned int); > >>> +} htab SEC(".maps"); > >>> + > >>> +/* nmi_handle on x86 platform. If changing keyword > >>> + * "static" to "inline", this prog load failed. */ > >>> +SEC("fentry/nmi_handle") > >> > >> The above comment is not what I mean. In arch/x86/kernel/nmi.c, > >> we have > >> static int nmi_handle(unsigned int type, struct pt_regs *regs) > >> { > >> ... > >> } > >> ... > >> static noinstr void default_do_nmi(struct pt_regs *regs) > >> { > >> ... > >> handled = nmi_handle(NMI_LOCAL, regs); > >> ... > >> } > >> > >> Since nmi_handle is a static function, it is possible that > >> the function might be inlined in default_do_nmi by the > >> compiler. If this happens, fentry/nmi_handle will not > >> be triggered and the test will pass. > >> > >> So I suggest to change the comment to > >> nmi_handle() is a static function and might be > >> inlined into its caller. If this happens, the > >> test can still pass without previous kernel fix. > > > > It's worse than this. > > fentry is buggy. > > We shouldn't allow attaching fentry to: > > NOKPROBE_SYMBOL(nmi_handle); > > Okay, I see. Looks we should prevent fentry from > attaching any NOKPROBE_SYMBOL functions. > > BTW, I think fentry/nmi_handle can be replaced with > tracepoint nmi/nmi_handler. it is more reliable The tracepoint will not reproduce the deadlock(we have discussed v2). If it's not easy to complete a test for this case, should we drop this testcase patch? or fentry the nmi_handle and update the comments. > and won't be impacted by potential NOKPROBE_SYMBOL > issues. -- Best regards, Tonghao