Hello everyone, I am performing some test with BPF SK_SKB and I have encountered a scenario resulting in kernel panic. I use a BPF_SK_SKB_STREAM_PARSER program to parse a request which might be spanning multiple TCP segments. If the end of request is detected in the parser program it returns skb->len, passing the request to the BPF_SK_SKB_STREAM_VERDICT program, and otherwise it returns 0, waiting for more data to be received. You can find the BPF program attached (bpf_test.c). Is there an assumption that the program violates? To reproduce the crashing scenario, I use the python script attached (client.py) which sends data in chunks toward the bpf program. Usually, the kernel crashes on the 3rd segment. To provide more information, I have attached some crash logs. I have tested this on kernel version 6.1.0 (slightly modified) and version 6.2.8 (unmodified, obtained from kernel.org). It seems that the panic happens when invoking the bpf_skb_pull_data. Is this a known issue or is there any information that I can provide to help resolve it? Sincerely, Farbod Shahinfar PhD student at Politecnico di Milano https://fshahinfar1.github.io/
kernel BUG at net/core/skbuff.c:2446! [ 1549.847914] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 1549.853139] CPU: 10 PID: 2008 Comm: python3 Tainted: G O 6.2.8 #4 [ 1549.860615] Hardware name: Dell Inc. PowerEdge C6525/04DK47, BIOS 2.0.3 01/15/2021 [ 1549.868181] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0 [ 1549.872973] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00 [ 1549.891711] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282 [ 1549.896939] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX: 00000000000002c0 [ 1549.904070] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI: ffff992f168adf00 [ 1549.911196] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09: ffff992f168ad300 [ 1549.918329] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000001 [ 1549.925460] R13: ffff992f168adf00 R14: ffff992f00fff048 R15: ffff992f168adf00 [ 1549.932585] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000) knlGS:0000000000000000 [ 1549.940670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1549.946409] CR2: 0000000000453531 CR3: 000000010a688000 CR4: 0000000000350ee0 [ 1549.953542] Call Trace: [ 1549.955997] <IRQ> [ 1549.958017] skb_ensure_writable+0x8f/0xa0 [ 1549.962114] sk_skb_pull_data+0x24/0x30 [ 1549.965953] bpf_prog_5c4f327a1e142419_parser+0x1f/0xd8 [ 1549.971179] sk_psock_strp_parse+0x50/0xc0 [ 1549.975279] __strp_recv+0x1fb/0x630 [ 1549.978859] strp_recv+0x27/0x30 [ 1549.982092] tcp_read_sock+0x91/0x1c0 [ 1549.985757] ? __pfx_strp_recv+0x10/0x10 [ 1549.989683] strp_read_sock+0x68/0xb0 [ 1549.993348] strp_data_ready+0x61/0xa0 [ 1549.997101] sk_psock_strp_data_ready+0x86/0xa0 [ 1550.001635] tcp_data_ready+0x33/0xe0 [ 1550.005301] tcp_data_queue+0x469/0xd40 [ 1550.009142] tcp_rcv_established+0x264/0x730 [ 1550.013411] ? sk_filter_trim_cap+0x114/0x250 [ 1550.017772] tcp_v4_do_rcv+0x15d/0x2b0 [ 1550.021524] tcp_v4_rcv+0x11f1/0x1260 [ 1550.025192] ip_protocol_deliver_rcu+0x3b/0x330 [ 1550.029724] ip_local_deliver_finish+0x8a/0xb0 [ 1550.034171] ip_local_deliver+0x73/0x120 [ 1550.038095] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 1550.043234] ip_rcv_finish+0xc2/0xd0 [ 1550.046816] ip_rcv+0x57/0xf0 [ 1550.049787] ? probe_sched_wakeup+0x39/0x40 [ 1550.053975] __netif_receive_skb_one_core+0x8c/0xa0 [ 1550.058854] __netif_receive_skb+0x15/0x60 [ 1550.062953] process_backlog+0xa8/0x140 [ 1550.066793] __napi_poll+0x31/0x1d0 [ 1550.070286] net_rx_action+0x290/0x2e0 [ 1550.074037] __do_softirq+0xf5/0x2d7 [ 1550.077618] do_softirq+0x9a/0xc0 [ 1550.080935] </IRQ> [ 1550.083032] <TASK> [ 1550.085129] __local_bh_enable_ip+0x7d/0x80 [ 1550.089318] ip_finish_output2+0x19c/0x570 [ 1550.093417] __ip_finish_output+0x1fe/0x2f0 [ 1550.097601] ? add_wait_queue+0x6f/0x80 [ 1550.101443] ip_finish_output+0x2e/0xd0 [ 1550.105282] ip_output+0x7e/0x110 [ 1550.108601] ? do_renameat2+0x244/0x5b0 [ 1550.112441] ? __pfx_ip_finish_output+0x10/0x10 [ 1550.116974] ip_local_out+0x62/0x70 [ 1550.120466] __ip_queue_xmit+0x192/0x450 [ 1550.124392] ip_queue_xmit+0x19/0x20 [ 1550.127971] __tcp_transmit_skb+0xa56/0xb90 [ 1550.132157] tcp_write_xmit+0x54d/0x12a0 [ 1550.136082] ? _copy_from_iter+0x12a/0x5b0 [ 1550.140184] __tcp_push_pending_frames+0x3b/0x110 [ 1550.144890] tcp_push+0x10c/0x120 [ 1550.148207] tcp_sendmsg_locked+0x491/0xc30 [ 1550.152397] tcp_sendmsg+0x31/0x50 [ 1550.155800] inet_sendmsg+0x47/0x80 [ 1550.159294] sock_sendmsg+0x66/0x70 [ 1550.162786] __sys_sendto+0x122/0x1b0 [ 1550.166454] ? debug_smp_processor_id+0x1b/0x30 [ 1550.170984] ? fpregs_assert_state_consistent+0x2b/0x60 [ 1550.176209] ? exit_to_user_mode_prepare+0x49/0x1b0 [ 1550.181090] __x64_sys_sendto+0x2d/0x40 [ 1550.184928] do_syscall_64+0x3f/0x90 [ 1550.188509] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 1550.193561] RIP: 0033:0x7fd4a6141530 [ 1550.197140] Code: ff eb bc 0f 1f 80 00 00 00 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 1d 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 68 c3 0f 1f 80 00 00 00 00 55 48 83 ec 20 48 [ 1550.215879] RSP: 002b:00007ffe74631ee8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 1550.223443] RAX: ffffffffffffffda RBX: 00007ffe74631f80 RCX: 00007fd4a6141530 [ 1550.230570] RDX: 0000000000000001 RSI: 00007fd4a5671ad0 RDI: 0000000000000003 [ 1550.237702] RBP: 000000000113cfe0 R08: 0000000000000000 R09: 0000000000000000 [ 1550.244835] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 1550.251967] R13: 0000000000000000 R14: 00007ffe74631f80 R15: 0000000000624240 [ 1550.259102] </TASK> [ 1550.261293] Modules linked in: nfsv3 nfs_acl nfs lockd grace fscache netfs ipod(O) ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd kvm_amd binfmt_misc kvm crct10dif_pclmul ghash_clmulni_intel sha512_ssse3 aesni_intel crypto_simd cryptd mgag200 acpi_ipmi drm_shmem_helper rapl wmi_bmof drm_kms_helper i2c_algo_bit syscopyarea ipmi_si ipmi_devintf sysfillrect sysimgblt ccp k10temp ipmi_msghandler acpi_power_meter mac_hid sch_fq_codel drm sunrpc ip_tables x_tables autofs4 mlx5_ib ib_uverbs ib_core mlx5_core pci_hyperv_intf ahci mlxfw crc32_pclmul libahci psample i2c_piix4 tls wmi [ 1550.313799] ---[ end trace 0000000000000000 ]--- [ 1550.318423] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0 [ 1550.323223] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00 [ 1550.341972] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282 [ 1550.347205] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX: 00000000000002c0 [ 1550.354339] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI: ffff992f168adf00 [ 1550.361478] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09: ffff992f168ad300 [ 1550.368611] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000001 [ 1550.375746] R13: ffff992f168adf00 R14: ffff992f00fff048 R15: ffff992f168adf00 [ 1550.382886] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000) knlGS:0000000000000000 [ 1550.390972] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1550.396719] CR2: 0000000000453531 CR3: 000000010a688000 CR4: 0000000000350ee0 [ 1550.403861] Kernel panic - not syncing: Fatal exception in interrupt [ 1550.410763] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 1550.420940] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
[ 404.840245] kernel BUG at net/core/skbuff.c:2380! [ 404.845070] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 404.846838] CPU: 2 PID: 1166 Comm: nc Not tainted 6.1.0-rc7-g83de108e3723-dirty #73 [ 404.848923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 404.850796] RIP: 0010:__pskb_pull_tail+0x3fc/0x450 [ 404.852171] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00 [ 404.856380] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282 [ 404.857563] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX: 0000000000000008 [ 404.859208] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI: 0000000000000000 [ 404.860790] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09: 7fffffffffffffff [ 404.862379] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12: 0000000000000008 [ 404.863963] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15: ffffa09f44d9f700 [ 404.865534] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000) knlGS:0000000000000000 [ 404.867367] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 404.868771] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4: 0000000000370ee0 [ 404.870448] Call Trace: [ 404.871063] <IRQ> [ 404.871663] skb_ensure_writable+0x84/0xa0 [ 404.872678] sk_skb_pull_data+0x17/0x20 [ 404.873650] bpf_prog_fddaddf43f6741a5_parser+0x24/0x1e9 [ 404.874942] ? selinux_netlbl_sock_rcv_skb+0x2e/0x1b0 [ 404.876160] ? kmem_cache_alloc+0x33/0x1b0 [ 404.877175] sk_psock_strp_parse+0x3f/0xc0 [ 404.878222] __strp_recv+0x1f2/0x620 [ 404.879143] ? strp_process+0x40/0x40 [ 404.880081] tcp_read_sock+0x7f/0x1b0 [ 404.881019] strp_read_sock+0x5e/0xa0 [ 404.882719] strp_data_ready+0x54/0x80 [ 404.883675] sk_psock_strp_data_ready+0x68/0x80 [ 404.884890] tcp_data_queue+0x43a/0xce0 [ 404.885950] tcp_rcv_established+0x248/0x6e0 [ 404.887026] tcp_v4_do_rcv+0x147/0x290 [ 404.887969] tcp_v4_rcv+0xe59/0xf00 [ 404.888865] ip_protocol_deliver_rcu+0x2d/0x1f0 [ 404.889985] ip_local_deliver_finish+0x6e/0x90 [ 404.891125] ip_local_deliver+0x66/0x110 [ 404.892116] ip_rcv+0x4a/0xf0 [ 404.893091] __netif_receive_skb_one_core+0x86/0xa0 [ 404.894300] process_backlog+0xa3/0x150 [ 404.895287] __napi_poll+0x24/0x160 [ 404.896249] net_rx_action+0x291/0x350 [ 404.897198] __do_softirq+0xb3/0x28c [ 404.898183] do_softirq+0x52/0x70 [ 404.899064] </IRQ> [ 404.899678] <TASK> [ 404.900310] __local_bh_enable_ip+0x5f/0x70 [ 404.901457] ip_finish_output2+0x179/0x500 [ 404.902572] ip_output+0x71/0x110 [ 404.903481] ? __ip_finish_output+0x2a0/0x2a0 [ 404.904643] __ip_queue_xmit+0x174/0x3d0 [ 404.905693] __tcp_transmit_skb+0xa38/0xb50 [ 404.906764] ? __alloc_skb+0x89/0x1b0 [ 404.907704] tcp_write_xmit+0x4dc/0x1160 [ 404.908689] __tcp_push_pending_frames+0x2d/0xc0 [ 404.909830] tcp_sendmsg_locked+0x291/0xbf0 [ 404.910872] tcp_sendmsg+0x23/0x40 [ 404.911739] sock_sendmsg+0x56/0x60 [ 404.913378] sock_write_iter+0x92/0xf0 [ 404.914352] vfs_write+0x356/0x3c0 [ 404.915263] ksys_write+0xa6/0xe0 [ 404.916298] do_syscall_64+0x38/0x90 [ 404.917245] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 404.918560] RIP: 0033:0x7f98874d4077 [ 404.919524] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [ 404.923807] RSP: 002b:00007ffe1f2db018 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 404.925817] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f98874d4077 [ 404.927706] RDX: 0000000000000008 RSI: 00007ffe1f2df080 RDI: 0000000000000003 [ 404.929369] RBP: 00007ffe1f2db058 R08: 0000000000000004 R09: 0000000000000001 [ 404.931039] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008 [ 404.932717] R13: 00007ffe1f2df080 R14: 0000000000000003 R15: 0000000000000005 [ 404.934448] </TASK> [ 404.935141] Modules linked in: [ 404.936005] ---[ end trace 0000000000000000 ]--- [ 404.937206] RIP: 0010:__pskb_pull_tail+0x3fc/0x450 [ 404.938475] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00 [ 404.943339] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282 [ 404.944681] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX: 0000000000000008 [ 404.946518] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI: 0000000000000000 [ 404.948302] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09: 7fffffffffffffff [ 404.950269] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12: 0000000000000008 [ 404.951691] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15: ffffa09f44d9f700 [ 404.953292] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000) knlGS:0000000000000000 [ 404.955084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 404.956425] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4: 0000000000370ee0 [ 404.958010] Kernel panic - not syncing: Fatal exception in interrupt [ 404.959636] Kernel Offset: 0x29200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 404.961972] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
import socket import time import argparse def _send(sock, data): print(f'[Sending "{data}"]') sock.send(data.encode()) def _recv(sock): try: resp = sock.recv(2048) return resp.decode() except: print('[No data received]') return None def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-k', help='Kernel crash scenario', action='store_true') parser.add_argument('--ip', default='localhost') parser.add_argument('--port', default=8080, type=int) args = parser.parse_args() return args def main(): args = parse_args() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((args.ip, args.port)) s.settimeout(1) # What request to send? req = ['hello world 1', 'hello world 2 END'] if args.k: req = ['hello world 1', 'hello world 2', 'hello world 3', 'hello world 4', 'hello world 5 END'] for r in req: _send(s, r) resp = _recv(s) if resp: print(resp) s.close() if __name__ == '__main__': main()
#include <sys/types.h> #include <sys/socket.h> #include <linux/tcp.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> /* Put state of each socket in this struct (This will be used in sockops.h as * part of per socket metadata) */ struct connection_state { }; #include "my_bpf/sockops.h" #define OFFSET_MASK 0x0fff /* struct arg { */ /* int test; */ /* }; */ /* struct { */ /* __uint(type, BPF_MAP_TYPE_ARRAY); */ /* /1* __uint(map_flags, BPF_F_MMAPABLE); *1/ */ /* __type(key, __u32); */ /* __type(value, struct arg); */ /* __uint(max_entries, 1); */ /* } arg_map SEC(".maps"); */ SEC("sk_skb/stream_parser") int parser(struct __sk_buff *skb) { void *data; void *data_end; __u16 len; /* Pull message data so that we can access it */ if (bpf_skb_pull_data(skb, skb->len) != 0) { bpf_printk("Parser: Failed to load message data\n"); return 0; } data = (void *)(long)skb->data; data_end = (void *)(long)skb->data_end; len = skb->len; char *ptr = data + ((len - 3) & 0x7fff); if ((void *)ptr < data || ((void *)ptr + 3 > data_end)) { bpf_printk("Parser: Not enough data!"); return 0; } if (ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D') { /* Found the end of request */ return skb->len; } bpf_printk("@%d\n%s", (long)ptr - (long)data, ptr); return 0; } SEC("sk_skb/stream_verdict") int verdict(struct __sk_buff *skb) { return SK_PASS; } char _license[] SEC("license") = "GPL";