From: Xu Kuohai <xukuohai@xxxxxxxxxx> sk_psock_backlog triggers a NULL dereference: BUG: kernel NULL pointer dereference, address: 000000000000000e #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 70 Comm: kworker/0:3 Not tainted 6.5.0-rc2-00585-gb11bbbe4c66e #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-p4 Workqueue: events sk_psock_backlog RIP: 0010:0xffffffffc0205254 Code: 00 00 48 89 94 24 a0 00 00 00 41 5f 41 5e 41 5d 41 5c 5d 5b 41 5b 41 5a 41 59 41 50 RSP: 0018:ffffc90000acbcb8 EFLAGS: 00010246 RAX: ffffffff81c5ee10 RBX: ffff888018260000 RCX: 0000000000000001 RDX: 0000000000000003 RSI: ffffc90000acbd58 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000080100005 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000003 R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000e CR3: 000000000b0de002 CR4: 0000000000170ef0 Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x15d/0x480 ? fixup_exception+0x26/0x330 ? exc_page_fault+0x72/0x1d0 ? asm_exc_page_fault+0x26/0x30 ? __pfx_inet_sendmsg+0x10/0x10 ? 0xffffffffc0205254 ? inet_sendmsg+0x20/0x80 ? sock_sendmsg+0x8f/0xa0 ? __skb_send_sock+0x315/0x360 ? __pfx_sendmsg_unlocked+0x10/0x10 ? sk_psock_backlog+0xb4/0x300 ? process_one_work+0x292/0x560 ? worker_thread+0x53/0x3e0 ? __pfx_worker_thread+0x10/0x10 ? kthread+0x102/0x130 ? __pfx_kthread+0x10/0x10 ? ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ? ret_from_fork_asm+0x1b/0x30 </TASK> The bug flow is as follows: thread 1 thread 2 sk_psock_backlog sock_close sk_psock_handle_skb __sock_release __skb_send_sock inet_release sendmsg_unlocked tcp_close sock_sendmsg lock_sock __tcp_close release_sock sock->sk = NULL // (1) inet_sendmsg sk = sock->sk // (2) inet_send_prepare inet_sk(sk)->inet_num // (3) sock->sk is set to NULL by thread 2 at time (1), then fetched by thread 1 at time (2), and used by thread 1 to access memory at time (3), resulting in NULL pointer dereference. To fix it, add lock_sock back on the egress path for sk_psock_handle_skb. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: Xu Kuohai <xukuohai@xxxxxxxxxx> --- net/core/skmsg.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 7c2764beeb04..8b758c51aa0d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -609,15 +609,42 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb return err; } +static int sk_psock_handle_ingress_skb(struct sk_psock *psock, + struct sk_buff *skb, + u32 off, u32 len) +{ + if (sock_flag(psock->sk, SOCK_DEAD)) + return -EIO; + return sk_psock_skb_ingress(psock, skb, off, len); +} + +static int sk_psock_handle_egress_skb(struct sk_psock *psock, + struct sk_buff *skb, + u32 off, u32 len) +{ + int ret; + + lock_sock(psock->sk); + + if (sock_flag(psock->sk, SOCK_DEAD)) + ret = -EIO; + else if (!sock_writeable(psock->sk)) + ret = -EAGAIN; + else + ret = skb_send_sock_locked(psock->sk, skb, off, len); + + release_sock(psock->sk); + + return ret; +} + static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { - if (!ingress) { - if (!sock_writeable(psock->sk)) - return -EAGAIN; - return skb_send_sock(psock->sk, skb, off, len); - } - return sk_psock_skb_ingress(psock, skb, off, len); + if (ingress) + return sk_psock_handle_ingress_skb(psock, skb, off, len); + else + return sk_psock_handle_egress_skb(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -660,10 +687,7 @@ static void sk_psock_backlog(struct work_struct *work) ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { - ret = -EIO; - if (!sock_flag(psock->sk, SOCK_DEAD)) - ret = sk_psock_handle_skb(psock, skb, off, - len, ingress); + ret = sk_psock_handle_skb(psock, skb, off, len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { sk_psock_skb_state(psock, state, len, off); -- 2.30.2