Hello, We are developing a tool called Spinner to detect locking violations in the bpf subsystem. Spinner reported a nested bpf locking issue with the helper function htab_lru_map_update_elem. htab_lru_map_update_elem calls bpf_common_lru_pop_free which takes a spin_lock. A deadlock will occur if a bpf program calls htab_lru_map_update_elem, takes the spin lock in bpf_common_lru_pop_free, and triggers nested execution of another bpf program that also calls htab_lru_map_update_elem and tries to take the same lock. This issue was reported for kernel version 6.9. However, we believe this should still exist in the latest kernel. We tried to validate the report on v6.10 kernel by running a PoC. Below is the lockdep splat. The PoC is attached at the end. Thanks, Priya ============================================ WARNING: possible recursive locking detected 6.10.0-rc7+ #69 Not tainted -------------------------------------------- ping/1186 is trying to acquire lock: ffffe8fffde25c60 (&loc_l->lock){..-.}-{2:2}, at: bpf_lru_pop_free+0x5f3/0x2010 but task is already holding lock: ffffe8fffde25c60 (&loc_l->lock){..-.}-{2:2}, at: bpf_lru_pop_free+0x5f3/0x2010 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&loc_l->lock); lock(&loc_l->lock); *** DEADLOCK *** May be due to missing lock nesting notation 5 locks held by ping/1186: #0: ffff888114d95d68 (sk_lock-AF_INET){+.+.}-{0:0}, at: ping_v4_sendmsg+0xadd/0x16b0 #1: ffffffff85d5cbc0 (rcu_read_lock){....}-{1:3}, at: ip_finish_output2+0x2e3/0x20a0 #2: ffffffff85d5cbc0 (rcu_read_lock){....}-{1:3}, at: process_backlog+0x3a5/0x14d0 #3: ffffe8fffde25c60 (&loc_l->lock){..-.}-{2:2}, at: bpf_lru_pop_free+0x5f3/0x2010 #4: ffffffff85d5cbc0 (rcu_read_lock){....}-{1:3}, at: trace_call_bpf+0xc3/0x810 stack backtrace: CPU: 8 PID: 1186 Comm: ping Not tainted 6.10.0-rc7+ #69 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: <IRQ> dump_stack_lvl+0x9f/0xf0 dump_stack+0x14/0x20 print_deadlock_bug+0x3ca/0x680 __lock_acquire+0x2ff5/0x6a60 ? __pfx___lock_acquire+0x10/0x10 ? __kasan_check_read+0x15/0x20 ? rb_commit+0xec/0x960 lock_acquire+0x1be/0x560 ? bpf_lru_pop_free+0x5f3/0x2010 ? __kasan_check_read+0x15/0x20 ? __pfx_lock_acquire+0x10/0x10 ? trace_buffer_unlock_commit_regs+0x51/0x4b0 ? trace_event_buffer_commit+0x19c/0xb60 _raw_spin_lock_irqsave+0x55/0xa0 ? bpf_lru_pop_free+0x5f3/0x2010 bpf_lru_pop_free+0x5f3/0x2010 ? __pfx_trace_event_raw_event_bpf_trace_printk+0x10/0x10 ? __pfx_bstr_printf+0x10/0x10 ? __this_cpu_preempt_check+0x17/0x20 prealloc_lru_pop+0x24/0xd0 htab_lru_map_update_elem+0x1a2/0x720 ? __pfx_htab_lru_map_update_elem+0x10/0x10 bpf_prog_2ee6c180efbc46fd_test_prog2+0x5e/0x62 trace_call_bpf+0x24d/0x810 ? __kasan_check_read+0x15/0x20 ? __pfx_trace_call_bpf+0x10/0x10 ? bpf_lru_pop_free+0x353/0x2010 kprobe_perf_func+0x108/0x8c0 ? __pfx_kprobe_perf_func+0x10/0x10 kprobe_dispatcher+0xbc/0x160 opt_pre_handler+0xd7/0x1b0 ? bpf_lru_pop_free+0x353/0x2010 optimized_callback+0x200/0x290 0xffffffffa08a8039 RIP: 0010:bpf_lru_pop_free+0x353/0x2010 Code: 1f 44 00 00 48 8d 41 28 48 89 45 90 81 e3 00 02 00 00 0f 85 dc 02 00 00 48 8b 7d 90 e8 06 79 95 02 85 c0 0f 84 e6 02 00 00 e9 <99> ea fe 1e 00 00 fc ff df 48 8b 55 b0 48 c1 ea 03 80 3c 02 00 0f RSP: 0018:ffff8881f4e09530 EFLAGS: 00000092 RAX: 0000000000000286 RBX: 0000607e09025c20 RCX: 1ffff1103e9d0fac RDX: 1ffffd1fffbc4b91 RSI: ffffffff846be8a0 RDI: ffffffff848fd000 RBP: ffff8881f4e095f8 R08: ffff8881f4e319a0 R09: 1ffffffff0a41db0 R10: ffffffff88096967 R11: 00000000cda74fb5 R12: 0000000000000008 R13: ffff88811366c300 R14: 1ffff1103e9c12cd R15: ffff88811366c01c ? __lock_acquire+0x18e5/0x6a60 ? __kasan_check_read+0x15/0x20 prealloc_lru_pop+0x24/0xd0 htab_lru_map_update_elem+0x1a2/0x720 ? __pfx_htab_lru_map_update_elem+0x10/0x10 bpf_prog_d7a1f3cbb8717020_test_prog1+0x4a/0x62 cls_bpf_classify+0x584/0x13d0 tcf_classify+0x52f/0x1260 tc_run+0x328/0x7f0 ? __pfx_tc_run+0x10/0x10 ? debug_smp_processor_id+0x1b/0x30 ? rcu_lockdep_current_cpu_online+0x3f/0x160 __netif_receive_skb_core.constprop.0+0xa3c/0x40c0 ? __pfx_mark_lock+0x10/0x10 ? __pfx___netif_receive_skb_core.constprop.0+0x10/0x10 ? __lock_acquire+0x18e5/0x6a60 __netif_receive_skb_one_core+0xb2/0x1c0 ? __pfx___netif_receive_skb_one_core+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? mark_held_locks+0xad/0xf0 __netif_receive_skb+0x21/0x150 process_backlog+0x3ec/0x14d0 ? rcu_read_lock_sched_held+0x4b/0x90 ? process_backlog+0x3a5/0x14d0 __napi_poll.constprop.0+0xaa/0x460 net_rx_action+0x52a/0xe00 ? __pfx_net_rx_action+0x10/0x10 ? __pfx_mark_lock+0x10/0x10 ? mark_held_locks+0xad/0xf0 ? handle_softirqs+0x1cb/0x980 handle_softirqs+0x215/0x980 ? __pfx_handle_softirqs+0x10/0x10 ? __dev_queue_xmit+0x87b/0x3cb0 __do_softirq+0x14/0x1a do_softirq.part.0+0xaf/0xf0 </IRQ> <TASK> __local_bh_enable_ip+0x127/0x150 ? __dev_queue_xmit+0x87b/0x3cb0 __dev_queue_xmit+0x890/0x3cb0 ? __lock_acquire+0x18e5/0x6a60 ? __kasan_check_read+0x15/0x20 ? __pfx___dev_queue_xmit+0x10/0x10 ? __lock_acquire+0x18e5/0x6a60 ? __pfx_mark_lock+0x10/0x10 ? check_chain_key+0x1c6/0x540 ? __this_cpu_preempt_check+0x17/0x20 ? ip_finish_output2+0x185a/0x20a0 ip_finish_output2+0xaa3/0x20a0 ? __kasan_check_read+0x15/0x20 ? __pfx_ip_finish_output2+0x10/0x10 ? __pfx_ip_skb_dst_mtu+0x10/0x10 __ip_finish_output+0x16f/0x2c0 ip_finish_output+0x2f/0x270 ip_output+0x17c/0x500 ? __ip_local_out+0x1f8/0x850 ? __pfx_ip_output+0x10/0x10 ? __kasan_check_write+0x18/0x20 ? __pfx_ip_finish_output+0x10/0x10 ? __ip_make_skb+0xe8c/0x2630 ip_local_out+0x24d/0x390 ip_push_pending_frames+0x8a/0x100 ping_v4_sendmsg+0xd9c/0x16b0 ? __pfx_ping_v4_sendmsg+0x10/0x10 ? __this_cpu_preempt_check+0x17/0x20 ? __might_sleep+0xb6/0x170 ? aa_sk_perm+0x26b/0x910 ? __kasan_check_write+0x18/0x20 inet_sendmsg+0xd3/0xf0 ? inet_sendmsg+0xd3/0xf0 __sys_sendto+0x3d4/0x4c0 ? __pfx___sys_sendto+0x10/0x10 ? __lock_acquire+0x18e5/0x6a60 __x64_sys_sendto+0xe4/0x1b0 ? __this_cpu_preempt_check+0x17/0x20 ? lockdep_hardirqs_on+0xcf/0x150 x64_sys_call+0x1b4b/0x1f20 do_syscall_64+0x8b/0x140 ? debug_smp_processor_id+0x1b/0x30 ? rcu_is_watching+0x17/0xd0 ? __rseq_handle_notify_resume+0xa24/0xd70 ? __this_cpu_preempt_check+0x17/0x20 ? xfd_validate_state+0x2f/0x160 ? debug_smp_processor_id+0x1b/0x30 ? do_syscall_64+0x97/0x140 ? __this_cpu_preempt_check+0x17/0x20 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? debug_smp_processor_id+0x1b/0x30 ? do_syscall_64+0x97/0x140 ? __this_cpu_preempt_check+0x17/0x20 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? __this_cpu_preempt_check+0x17/0x20 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? do_syscall_64+0x97/0x140 ? __this_cpu_preempt_check+0x17/0x20 ? lockdep_hardirqs_on+0xcf/0x150 ? syscall_exit_to_user_mode+0xd5/0x220 ? do_syscall_64+0x97/0x140 ? do_syscall_64+0x97/0x140 ? do_syscall_64+0x97/0x140 ? irqentry_exit+0x6f/0xa0 ? exc_page_fault+0x8d/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7ff57ff27a0a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffd51bb0f48 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007ffd51bb25f0 RCX: 00007ff57ff27a0a RDX: 0000000000000040 RSI: 00005596cdeea450 RDI: 0000000000000003 RBP: 00005596cdeea450 R08: 00007ffd51bb4870 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 00007ffd51bb2188 R14: 00007ffd51bb0f50 R15: 00007ffd51bb25f0 </TASK> ========================================== #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char LICENSE[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 1024); __type(key, __u32); __type(value, __u64); } this_map SEC(".maps"); SEC("classifier") int test_prog1(struct __sk_buff *ctx){ __u32 key = 1; __u64 value = 2; bpf_map_update_elem(&this_map, &key, &value, BPF_ANY); bpf_printk("classifier"); return 0; } SEC("kprobe/bpf_lru_pop_free+0x352") int test_prog2(void *ctx){ __u32 key = 1; __u64 value = 2; bpf_printk("kprobe"); bpf_map_update_elem(&this_map, &key, &value, BPF_ANY); return 0; } ========================================= #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <errno.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> #include <signal.h> #define LO_IFINDEX 1 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { return vfprintf(stderr, format, args); } static volatile bool exiting = false; static void sig_handler(int sig) { exiting = true; } int main(int argc, char **argv) { int err; libbpf_set_print(libbpf_print_fn); //handling ctrl+c signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = LO_IFINDEX, .attach_point = BPF_TC_INGRESS); DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); bool hook_created = false; const char *obj_file = "recurtest2.bpf.o"; struct bpf_object *obj = bpf_object__open_file(obj_file, NULL); if (!obj) return 1; err = bpf_object__load(obj); if (err) { fprintf(stderr, "Error loading BPF target object\n"); return 1; } struct bpf_program *prog1 = bpf_object__find_program_by_name(obj, "test_prog1"); if (!prog1) { fprintf(stderr, "Error finding BPF program by title\n"); goto cleanup; } err = bpf_tc_hook_create(&tc_hook); if (!err) hook_created = true; if (err && err != -EEXIST) { fprintf(stderr, "Failed to create TC hook: %d\n", err); goto cleanup; } tc_opts.prog_fd = bpf_program__fd(prog1); err = bpf_tc_attach(&tc_hook, &tc_opts); if (err) { fprintf(stderr, "Failed to attach TC: %d\n", err); goto cleanup; } struct bpf_program *prog2 = bpf_object__find_program_by_name(obj, "test_prog2"); if (!prog2) { fprintf(stderr, "Error finding BPF program by title\n"); goto cleanup; } struct bpf_link *link2 = bpf_program__attach(prog2); if (!link2) { fprintf(stderr, "Error attaching kprobe\n"); return 1; } printf("Started successfully"); //for (int i=0; i<10000000; i++) printf(""); while(!exiting) {} bpf_link__destroy(link2); tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; err = bpf_tc_detach(&tc_hook, &tc_opts); if (err) { fprintf(stderr, "Failed to detach TC: %d\n", err); goto cleanup; } cleanup: if (hook_created) bpf_tc_hook_destroy(&tc_hook); bpf_object__close(obj); return 0; }