On 03/24, Farbod Shahinfar wrote:
Hello everyone,
I am performing some test with BPF SK_SKB and I have encountered a
scenario resulting in kernel panic. I use a BPF_SK_SKB_STREAM_PARSER
program to parse a request which might be spanning multiple TCP
segments. If the end of request is detected in the parser program it
returns skb->len, passing the request to the
BPF_SK_SKB_STREAM_VERDICT program, and otherwise it returns 0, waiting
for more data to be received. You can find the BPF program attached
(bpf_test.c). Is there an assumption that the program violates?
To reproduce the crashing scenario, I use the python script attached
(client.py) which sends data in chunks toward the bpf program. Usually,
the kernel crashes on the 3rd segment.
�
To provide more information, I have attached some crash logs. I have
tested this on kernel version 6.1.0 (slightly modified) and version
6.2.8 (unmodified, obtained from kernel.org). It seems that the panic
happens when invoking the bpf_skb_pull_data.
Is this a known issue or is there any information that I can provide to
help resolve it?
John, is it something already covered by your recent [0]?
0:
https://lore.kernel.org/bpf/20230321215212.525630-1-john.fastabend@xxxxxxxxx/
Sincerely,
Farbod Shahinfar
PhD student at Politecnico di Milano
https://fshahinfar1.github.io/
kernel BUG at net/core/skbuff.c:2446!
[ 1549.847914] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[ 1549.853139] CPU: 10 PID: 2008 Comm: python3 Tainted: G
O 6.2.8 #4
[ 1549.860615] Hardware name: Dell Inc. PowerEdge C6525/04DK47, BIOS
2.0.3 01/15/2021
[ 1549.868181] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0
[ 1549.872973] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31
c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff
ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
[ 1549.891711] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282
[ 1549.896939] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX:
00000000000002c0
[ 1549.904070] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI:
ffff992f168adf00
[ 1549.911196] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09:
ffff992f168ad300
[ 1549.918329] R10: 0000000000000001 R11: 0000000000000001 R12:
0000000000000001
[ 1549.925460] R13: ffff992f168adf00 R14: ffff992f00fff048 R15:
ffff992f168adf00
[ 1549.932585] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000)
knlGS:0000000000000000
[ 1549.940670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1549.946409] CR2: 0000000000453531 CR3: 000000010a688000 CR4:
0000000000350ee0
[ 1549.953542] Call Trace:
[ 1549.955997] <IRQ>
[ 1549.958017] skb_ensure_writable+0x8f/0xa0
[ 1549.962114] sk_skb_pull_data+0x24/0x30
[ 1549.965953] bpf_prog_5c4f327a1e142419_parser+0x1f/0xd8
[ 1549.971179] sk_psock_strp_parse+0x50/0xc0
[ 1549.975279] __strp_recv+0x1fb/0x630
[ 1549.978859] strp_recv+0x27/0x30
[ 1549.982092] tcp_read_sock+0x91/0x1c0
[ 1549.985757] ? __pfx_strp_recv+0x10/0x10
[ 1549.989683] strp_read_sock+0x68/0xb0
[ 1549.993348] strp_data_ready+0x61/0xa0
[ 1549.997101] sk_psock_strp_data_ready+0x86/0xa0
[ 1550.001635] tcp_data_ready+0x33/0xe0
[ 1550.005301] tcp_data_queue+0x469/0xd40
[ 1550.009142] tcp_rcv_established+0x264/0x730
[ 1550.013411] ? sk_filter_trim_cap+0x114/0x250
[ 1550.017772] tcp_v4_do_rcv+0x15d/0x2b0
[ 1550.021524] tcp_v4_rcv+0x11f1/0x1260
[ 1550.025192] ip_protocol_deliver_rcu+0x3b/0x330
[ 1550.029724] ip_local_deliver_finish+0x8a/0xb0
[ 1550.034171] ip_local_deliver+0x73/0x120
[ 1550.038095] ? __pfx_ip_local_deliver_finish+0x10/0x10
[ 1550.043234] ip_rcv_finish+0xc2/0xd0
[ 1550.046816] ip_rcv+0x57/0xf0
[ 1550.049787] ? probe_sched_wakeup+0x39/0x40
[ 1550.053975] __netif_receive_skb_one_core+0x8c/0xa0
[ 1550.058854] __netif_receive_skb+0x15/0x60
[ 1550.062953] process_backlog+0xa8/0x140
[ 1550.066793] __napi_poll+0x31/0x1d0
[ 1550.070286] net_rx_action+0x290/0x2e0
[ 1550.074037] __do_softirq+0xf5/0x2d7
[ 1550.077618] do_softirq+0x9a/0xc0
[ 1550.080935] </IRQ>
[ 1550.083032] <TASK>
[ 1550.085129] __local_bh_enable_ip+0x7d/0x80
[ 1550.089318] ip_finish_output2+0x19c/0x570
[ 1550.093417] __ip_finish_output+0x1fe/0x2f0
[ 1550.097601] ? add_wait_queue+0x6f/0x80
[ 1550.101443] ip_finish_output+0x2e/0xd0
[ 1550.105282] ip_output+0x7e/0x110
[ 1550.108601] ? do_renameat2+0x244/0x5b0
[ 1550.112441] ? __pfx_ip_finish_output+0x10/0x10
[ 1550.116974] ip_local_out+0x62/0x70
[ 1550.120466] __ip_queue_xmit+0x192/0x450
[ 1550.124392] ip_queue_xmit+0x19/0x20
[ 1550.127971] __tcp_transmit_skb+0xa56/0xb90
[ 1550.132157] tcp_write_xmit+0x54d/0x12a0
[ 1550.136082] ? _copy_from_iter+0x12a/0x5b0
[ 1550.140184] __tcp_push_pending_frames+0x3b/0x110
[ 1550.144890] tcp_push+0x10c/0x120
[ 1550.148207] tcp_sendmsg_locked+0x491/0xc30
[ 1550.152397] tcp_sendmsg+0x31/0x50
[ 1550.155800] inet_sendmsg+0x47/0x80
[ 1550.159294] sock_sendmsg+0x66/0x70
[ 1550.162786] __sys_sendto+0x122/0x1b0
[ 1550.166454] ? debug_smp_processor_id+0x1b/0x30
[ 1550.170984] ? fpregs_assert_state_consistent+0x2b/0x60
[ 1550.176209] ? exit_to_user_mode_prepare+0x49/0x1b0
[ 1550.181090] __x64_sys_sendto+0x2d/0x40
[ 1550.184928] do_syscall_64+0x3f/0x90
[ 1550.188509] entry_SYSCALL_64_after_hwframe+0x72/0xdc
[ 1550.193561] RIP: 0033:0x7fd4a6141530
[ 1550.197140] Code: ff eb bc 0f 1f 80 00 00 00 00 f3 0f 1e fa 41 89 ca
64 8b 04 25 18 00 00 00 85 c0 75 1d 45 31 c9 45 31 c0 b8 2c 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 68 c3 0f 1f 80 00 00 00 00 55 48 83 ec 20 48
[ 1550.215879] RSP: 002b:00007ffe74631ee8 EFLAGS: 00000246 ORIG_RAX:
000000000000002c
[ 1550.223443] RAX: ffffffffffffffda RBX: 00007ffe74631f80 RCX:
00007fd4a6141530
[ 1550.230570] RDX: 0000000000000001 RSI: 00007fd4a5671ad0 RDI:
0000000000000003
[ 1550.237702] RBP: 000000000113cfe0 R08: 0000000000000000 R09:
0000000000000000
[ 1550.244835] R10: 0000000000000000 R11: 0000000000000246 R12:
0000000000000001
[ 1550.251967] R13: 0000000000000000 R14: 00007ffe74631f80 R15:
0000000000624240
[ 1550.259102] </TASK>
[ 1550.261293] Modules linked in: nfsv3 nfs_acl nfs lockd grace fscache
netfs ipod(O) ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac
edac_mce_amd kvm_amd binfmt_misc kvm crct10dif_pclmul ghash_clmulni_intel
sha512_ssse3 aesni_intel crypto_simd cryptd mgag200 acpi_ipmi
drm_shmem_helper rapl wmi_bmof drm_kms_helper i2c_algo_bit syscopyarea
ipmi_si ipmi_devintf sysfillrect sysimgblt ccp k10temp ipmi_msghandler
acpi_power_meter mac_hid sch_fq_codel drm sunrpc ip_tables x_tables
autofs4 mlx5_ib ib_uverbs ib_core mlx5_core pci_hyperv_intf ahci mlxfw
crc32_pclmul libahci psample i2c_piix4 tls wmi
[ 1550.313799] ---[ end trace 0000000000000000 ]---
[ 1550.318423] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0
[ 1550.323223] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31
c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff
ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
[ 1550.341972] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282
[ 1550.347205] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX:
00000000000002c0
[ 1550.354339] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI:
ffff992f168adf00
[ 1550.361478] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09:
ffff992f168ad300
[ 1550.368611] R10: 0000000000000001 R11: 0000000000000001 R12:
0000000000000001
[ 1550.375746] R13: ffff992f168adf00 R14: ffff992f00fff048 R15:
ffff992f168adf00
[ 1550.382886] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000)
knlGS:0000000000000000
[ 1550.390972] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1550.396719] CR2: 0000000000453531 CR3: 000000010a688000 CR4:
0000000000350ee0
[ 1550.403861] Kernel panic - not syncing: Fatal exception in interrupt
[ 1550.410763] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation
range: 0xffffffff80000000-0xffffffffbfffffff)
[ 1550.420940] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---
[ 404.840245] kernel BUG at net/core/skbuff.c:2380!
[ 404.845070] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[ 404.846838] CPU: 2 PID: 1166 Comm: nc Not tainted
6.1.0-rc7-g83de108e3723-dirty #73
[ 404.848923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
1.15.0-1 04/01/2014
[ 404.850796] RIP: 0010:__pskb_pull_tail+0x3fc/0x450
[ 404.852171] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31
c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff
ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
[ 404.856380] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282
[ 404.857563] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX:
0000000000000008
[ 404.859208] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI:
0000000000000000
[ 404.860790] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09:
7fffffffffffffff
[ 404.862379] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12:
0000000000000008
[ 404.863963] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15:
ffffa09f44d9f700
[ 404.865534] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000)
knlGS:0000000000000000
[ 404.867367] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 404.868771] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4:
0000000000370ee0
[ 404.870448] Call Trace:
[ 404.871063] <IRQ>
[ 404.871663] skb_ensure_writable+0x84/0xa0
[ 404.872678] sk_skb_pull_data+0x17/0x20
[ 404.873650] bpf_prog_fddaddf43f6741a5_parser+0x24/0x1e9
[ 404.874942] ? selinux_netlbl_sock_rcv_skb+0x2e/0x1b0
[ 404.876160] ? kmem_cache_alloc+0x33/0x1b0
[ 404.877175] sk_psock_strp_parse+0x3f/0xc0
[ 404.878222] __strp_recv+0x1f2/0x620
[ 404.879143] ? strp_process+0x40/0x40
[ 404.880081] tcp_read_sock+0x7f/0x1b0
[ 404.881019] strp_read_sock+0x5e/0xa0
[ 404.882719] strp_data_ready+0x54/0x80
[ 404.883675] sk_psock_strp_data_ready+0x68/0x80
[ 404.884890] tcp_data_queue+0x43a/0xce0
[ 404.885950] tcp_rcv_established+0x248/0x6e0
[ 404.887026] tcp_v4_do_rcv+0x147/0x290
[ 404.887969] tcp_v4_rcv+0xe59/0xf00
[ 404.888865] ip_protocol_deliver_rcu+0x2d/0x1f0
[ 404.889985] ip_local_deliver_finish+0x6e/0x90
[ 404.891125] ip_local_deliver+0x66/0x110
[ 404.892116] ip_rcv+0x4a/0xf0
[ 404.893091] __netif_receive_skb_one_core+0x86/0xa0
[ 404.894300] process_backlog+0xa3/0x150
[ 404.895287] __napi_poll+0x24/0x160
[ 404.896249] net_rx_action+0x291/0x350
[ 404.897198] __do_softirq+0xb3/0x28c
[ 404.898183] do_softirq+0x52/0x70
[ 404.899064] </IRQ>
[ 404.899678] <TASK>
[ 404.900310] __local_bh_enable_ip+0x5f/0x70
[ 404.901457] ip_finish_output2+0x179/0x500
[ 404.902572] ip_output+0x71/0x110
[ 404.903481] ? __ip_finish_output+0x2a0/0x2a0
[ 404.904643] __ip_queue_xmit+0x174/0x3d0
[ 404.905693] __tcp_transmit_skb+0xa38/0xb50
[ 404.906764] ? __alloc_skb+0x89/0x1b0
[ 404.907704] tcp_write_xmit+0x4dc/0x1160
[ 404.908689] __tcp_push_pending_frames+0x2d/0xc0
[ 404.909830] tcp_sendmsg_locked+0x291/0xbf0
[ 404.910872] tcp_sendmsg+0x23/0x40
[ 404.911739] sock_sendmsg+0x56/0x60
[ 404.913378] sock_write_iter+0x92/0xf0
[ 404.914352] vfs_write+0x356/0x3c0
[ 404.915263] ksys_write+0xa6/0xe0
[ 404.916298] do_syscall_64+0x38/0x90
[ 404.917245] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 404.918560] RIP: 0033:0x7f98874d4077
[ 404.919524] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00
00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[ 404.923807] RSP: 002b:00007ffe1f2db018 EFLAGS: 00000246 ORIG_RAX:
0000000000000001
[ 404.925817] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007f98874d4077
[ 404.927706] RDX: 0000000000000008 RSI: 00007ffe1f2df080 RDI:
0000000000000003
[ 404.929369] RBP: 00007ffe1f2db058 R08: 0000000000000004 R09:
0000000000000001
[ 404.931039] R10: 0000000000000000 R11: 0000000000000246 R12:
0000000000000008
[ 404.932717] R13: 00007ffe1f2df080 R14: 0000000000000003 R15:
0000000000000005
[ 404.934448] </TASK>
[ 404.935141] Modules linked in:
[ 404.936005] ---[ end trace 0000000000000000 ]---
[ 404.937206] RIP: 0010:__pskb_pull_tail+0x3fc/0x450
[ 404.938475] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31
c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff
ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
[ 404.943339] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282
[ 404.944681] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX:
0000000000000008
[ 404.946518] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI:
0000000000000000
[ 404.948302] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09:
7fffffffffffffff
[ 404.950269] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12:
0000000000000008
[ 404.951691] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15:
ffffa09f44d9f700
[ 404.953292] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000)
knlGS:0000000000000000
[ 404.955084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 404.956425] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4:
0000000000370ee0
[ 404.958010] Kernel panic - not syncing: Fatal exception in interrupt
[ 404.959636] Kernel Offset: 0x29200000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 404.961972] ---[ end Kernel panic - not syncing: Fatal exception in
interrupt ]---
import socket
import time
import argparse
def _send(sock, data):
print(f'[Sending "{data}"]')
sock.send(data.encode())
def _recv(sock):
try:
resp = sock.recv(2048)
return resp.decode()
except:
print('[No data received]')
return None
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-k', help='Kernel crash scenario',
action='store_true')
parser.add_argument('--ip', default='localhost')
parser.add_argument('--port', default=8080, type=int)
args = parser.parse_args()
return args
def main():
args = parse_args()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((args.ip, args.port))
s.settimeout(1)
# What request to send?
req = ['hello world 1', 'hello world 2 END']
if args.k:
req = ['hello world 1', 'hello world 2', 'hello world 3',
'hello world 4', 'hello world 5 END']
for r in req:
_send(s, r)
resp = _recv(s)
if resp:
print(resp)
s.close()
if __name__ == '__main__':
main()
#include <sys/types.h>
#include <sys/socket.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
/* Put state of each socket in this struct (This will be used in
sockops.h as
* part of per socket metadata) */
struct connection_state { };
#include "my_bpf/sockops.h"
#define OFFSET_MASK 0x0fff
/* struct arg { */
/* int test; */
/* }; */
/* struct { */
/* __uint(type, BPF_MAP_TYPE_ARRAY); */
/* /1* __uint(map_flags, BPF_F_MMAPABLE); *1/ */
/* __type(key, __u32); */
/* __type(value, struct arg); */
/* __uint(max_entries, 1); */
/* } arg_map SEC(".maps"); */
SEC("sk_skb/stream_parser")
int parser(struct __sk_buff *skb)
{
void *data;
void *data_end;
__u16 len;
/* Pull message data so that we can access it */
if (bpf_skb_pull_data(skb, skb->len) != 0) {
bpf_printk("Parser: Failed to load message data\n");
return 0;
}
data = (void *)(long)skb->data;
data_end = (void *)(long)skb->data_end;
len = skb->len;
char *ptr = data + ((len - 3) & 0x7fff);
if ((void *)ptr < data || ((void *)ptr + 3 > data_end)) {
bpf_printk("Parser: Not enough data!");
return 0;
}
if (ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D') {
/* Found the end of request */
return skb->len;
}
bpf_printk("@%d\n%s", (long)ptr - (long)data, ptr);
return 0;
}
SEC("sk_skb/stream_verdict")
int verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
char _license[] SEC("license") = "GPL";