On Fri, 29 May 2020 23:06:41 +0000 John Fastabend <john.fastabend@xxxxxxxxx> wrote: > We will need this block of code called from tls context shortly > lets refactor the redirect logic so its easy to use. This also > cleans up the switch stmt so we have fewer fallthrough cases. > > No logic changes are intended. > > Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface") > Signed-off-by: John Fastabend <john.fastabend@xxxxxxxxx> > --- > net/core/skmsg.c | 55 +++++++++++++++++++++++++++++++++--------------------- > 1 file changed, 34 insertions(+), 21 deletions(-) > > diff --git a/net/core/skmsg.c b/net/core/skmsg.c > index c479372..9d72f71 100644 > --- a/net/core/skmsg.c > +++ b/net/core/skmsg.c > @@ -682,13 +682,43 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) > return container_of(parser, struct sk_psock, parser); > } > > -static void sk_psock_verdict_apply(struct sk_psock *psock, > - struct sk_buff *skb, int verdict) > +static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) > { > struct sk_psock *psock_other; > struct sock *sk_other; > bool ingress; > > + sk_other = tcp_skb_bpf_redirect_fetch(skb); > + if (unlikely(!sk_other)) { > + kfree_skb(skb); > + return; > + } > + psock_other = sk_psock(sk_other); I think we're not in RCU read-side critical section and so lockdep-RCU [0] is complaining about accessing sk_user_data with rcu_dereference: bash-5.0# ./test_sockmap # 1/ 6 sockmap::txmsg test passthrough:OK # 2/ 6 sockmap::txmsg test redirect:OK # 3/ 6 sockmap::txmsg test drop:OK # 4/ 6 sockmap::txmsg test ingress redirect:OK [ 96.791996] [ 96.792211] ============================= [ 96.792763] WARNING: suspicious RCU usage [ 96.793297] 5.7.0-rc7-02964-g615b5749876a-dirty #692 Not tainted [ 96.794032] ----------------------------- [ 96.794480] include/linux/skmsg.h:284 suspicious rcu_dereference_check() usage! [ 96.795154] [ 96.795154] other info that might help us debug this: [ 96.795154] [ 96.795926] [ 96.795926] rcu_scheduler_active = 2, debug_locks = 1 [ 96.796556] 1 lock held by test_sockmap/1060: [ 96.796970] #0: ffff8880a0d35f20 (sk_lock-AF_INET){+.+.}-{0:0}, at: tls_sw_recvmsg+0x238/0xc10 [ 96.797813] [ 96.797813] stack backtrace: [ 96.798224] CPU: 1 PID: 1060 Comm: test_sockmap Not tainted 5.7.0-rc7-02964-g615b5749876a-dirty #692 [ 96.799071] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ 96.800294] Call Trace: [ 96.800543] dump_stack+0x97/0xe0 [ 96.800864] sk_psock_skb_redirect.isra.0+0xa6/0x1b0 [ 96.801338] sk_psock_tls_strp_read+0x298/0x310 [ 96.801769] tls_sw_recvmsg+0xa47/0xc10 [ 96.802144] ? decrypt_skb+0x80/0x80 [ 96.802491] ? lock_downgrade+0x330/0x330 [ 96.802887] inet_recvmsg+0xae/0x2a0 [ 96.803224] ? rw_copy_check_uvector+0x15e/0x1b0 [ 96.803669] ? inet_sendpage+0xc0/0xc0 [ 96.804029] ____sys_recvmsg+0x110/0x210 [ 96.804417] ? kernel_recvmsg+0x60/0x60 [ 96.804785] ? copy_msghdr_from_user+0x91/0xd0 [ 96.805200] ? __copy_msghdr_from_user+0x230/0x230 [ 96.805665] ? lock_acquire+0x120/0x4b0 [ 96.806025] ? match_held_lock+0x1b/0x230 [ 96.806417] ___sys_recvmsg+0xb8/0x100 [ 96.806778] ? ___sys_sendmsg+0x110/0x110 [ 96.807155] ? lock_downgrade+0x330/0x330 [ 96.807555] ? __fget_light+0xad/0x110 [ 96.807917] ? sockfd_lookup_light+0x91/0xb0 [ 96.808334] __sys_recvmsg+0x87/0xe0 [ 96.808674] ? __sys_recvmsg_sock+0x70/0x70 [ 96.809064] ? rcu_read_lock_sched_held+0x81/0xb0 [ 96.809540] ? switch_fpu_return+0x1/0x250 [ 96.809948] ? do_syscall_64+0x5f/0x9a0 [ 96.810325] do_syscall_64+0xad/0x9a0 [ 96.810676] ? handle_mm_fault+0x21e/0x3d0 [ 96.811060] ? syscall_return_slowpath+0x530/0x530 [ 96.811524] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 96.811955] ? lockdep_hardirqs_off+0xb5/0x100 [ 96.812383] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 96.812871] entry_SYSCALL_64_after_hwframe+0x49/0xb3 [ 96.813425] RIP: 0033:0x7f92ff15c737 [ 96.813762] Code: 02 b8 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2f 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 96.815477] RSP: 002b:00007ffd80734b68 EFLAGS: 00000246 ORIG_RAX: 000000000000002f [ 96.816177] RAX: ffffffffffffffda RBX: 000000000000001c RCX: 00007f92ff15c737 [ 96.816847] RDX: 0000000000004000 RSI: 00007ffd80734bd0 RDI: 000000000000001c [ 96.817602] RBP: 00007ffd80734c50 R08: 00007ffd80734bc0 R09: 0000000000000060 [ 96.818351] R10: fffffffffffffb15 R11: 0000000000000246 R12: 0000000000000000 [ 96.819107] R13: 0000000000004000 R14: 00007f92fef7a6b8 R15: 00007ffd80734d50 [0] https://www.kernel.org/doc/Documentation/RCU/lockdep-splat.txt > + if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || > + !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { > + kfree_skb(skb); > + return; > + } > + > + ingress = tcp_skb_bpf_ingress(skb); > + if ((!ingress && sock_writeable(sk_other)) || > + (ingress && > + atomic_read(&sk_other->sk_rmem_alloc) <= > + sk_other->sk_rcvbuf)) { > + if (!ingress) > + skb_set_owner_w(skb, sk_other); > + skb_queue_tail(&psock_other->ingress_skb, skb); > + schedule_work(&psock_other->work); > + } else { > + kfree_skb(skb); > + } > +} > + > +static void sk_psock_verdict_apply(struct sk_psock *psock, > + struct sk_buff *skb, int verdict) > +{ > + struct sock *sk_other; > + > switch (verdict) { > case __SK_PASS: > sk_other = psock->sk; > @@ -707,25 +737,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, > } > goto out_free; > case __SK_REDIRECT: > - sk_other = tcp_skb_bpf_redirect_fetch(skb); > - if (unlikely(!sk_other)) > - goto out_free; > - psock_other = sk_psock(sk_other); > - if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || > - !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) > - goto out_free; > - ingress = tcp_skb_bpf_ingress(skb); > - if ((!ingress && sock_writeable(sk_other)) || > - (ingress && > - atomic_read(&sk_other->sk_rmem_alloc) <= > - sk_other->sk_rcvbuf)) { > - if (!ingress) > - skb_set_owner_w(skb, sk_other); > - skb_queue_tail(&psock_other->ingress_skb, skb); > - schedule_work(&psock_other->work); > - break; > - } > - /* fall-through */ > + sk_psock_skb_redirect(psock, skb); > + break; > case __SK_DROP: > /* fall-through */ > default: >