Patch "net: implement lockless setsockopt(SO_PEEK_OFF)" has been added to the 6.7-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    net: implement lockless setsockopt(SO_PEEK_OFF)

to the 6.7-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     net-implement-lockless-setsockopt-so_peek_off.patch
and it can be found in the queue-6.7 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit 42ad90eb03f1162f3d9dc9d2441eaafb67762bec
Author: Eric Dumazet <edumazet@xxxxxxxxxx>
Date:   Mon Feb 19 14:12:20 2024 +0000

    net: implement lockless setsockopt(SO_PEEK_OFF)
    
    [ Upstream commit 56667da7399eb19af857e30f41bea89aa6fa812c ]
    
    syzbot reported a lockdep violation [1] involving af_unix
    support of SO_PEEK_OFF.
    
    Since SO_PEEK_OFF is inherently not thread safe (it uses a per-socket
    sk_peek_off field), there is really no point to enforce a pointless
    thread safety in the kernel.
    
    After this patch :
    
    - setsockopt(SO_PEEK_OFF) no longer acquires the socket lock.
    
    - skb_consume_udp() no longer has to acquire the socket lock.
    
    - af_unix no longer needs a special version of sk_set_peek_off(),
      because it does not lock u->iolock anymore.
    
    As a followup, we could replace prot->set_peek_off to be a boolean
    and avoid an indirect call, since we always use sk_set_peek_off().
    
    [1]
    
    WARNING: possible circular locking dependency detected
    6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Not tainted
    
    syz-executor.2/30025 is trying to acquire lock:
     ffff8880765e7d80 (&u->iolock){+.+.}-{3:3}, at: unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
    
    but task is already holding lock:
     ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline]
     ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline]
     ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193
    
    which lock already depends on the new lock.
    
    the existing dependency chain (in reverse order) is:
    
    -> #1 (sk_lock-AF_UNIX){+.+.}-{0:0}:
            lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
            lock_sock_nested+0x48/0x100 net/core/sock.c:3524
            lock_sock include/net/sock.h:1691 [inline]
            __unix_dgram_recvmsg+0x1275/0x12c0 net/unix/af_unix.c:2415
            sock_recvmsg_nosec+0x18e/0x1d0 net/socket.c:1046
            ____sys_recvmsg+0x3c0/0x470 net/socket.c:2801
            ___sys_recvmsg net/socket.c:2845 [inline]
            do_recvmmsg+0x474/0xae0 net/socket.c:2939
            __sys_recvmmsg net/socket.c:3018 [inline]
            __do_sys_recvmmsg net/socket.c:3041 [inline]
            __se_sys_recvmmsg net/socket.c:3034 [inline]
            __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034
           do_syscall_64+0xf9/0x240
           entry_SYSCALL_64_after_hwframe+0x6f/0x77
    
    -> #0 (&u->iolock){+.+.}-{3:3}:
            check_prev_add kernel/locking/lockdep.c:3134 [inline]
            check_prevs_add kernel/locking/lockdep.c:3253 [inline]
            validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869
            __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137
            lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
            __mutex_lock_common kernel/locking/mutex.c:608 [inline]
            __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
            unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
           sk_setsockopt+0x207e/0x3360
            do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307
            __sys_setsockopt+0x1ad/0x250 net/socket.c:2334
            __do_sys_setsockopt net/socket.c:2343 [inline]
            __se_sys_setsockopt net/socket.c:2340 [inline]
            __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
           do_syscall_64+0xf9/0x240
           entry_SYSCALL_64_after_hwframe+0x6f/0x77
    
    other info that might help us debug this:
    
     Possible unsafe locking scenario:
    
           CPU0                    CPU1
           ----                    ----
      lock(sk_lock-AF_UNIX);
                                   lock(&u->iolock);
                                   lock(sk_lock-AF_UNIX);
      lock(&u->iolock);
    
     *** DEADLOCK ***
    
    1 lock held by syz-executor.2/30025:
      #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline]
      #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline]
      #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193
    
    stack backtrace:
    CPU: 0 PID: 30025 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0
    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024
    Call Trace:
     <TASK>
      __dump_stack lib/dump_stack.c:88 [inline]
      dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106
      check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187
      check_prev_add kernel/locking/lockdep.c:3134 [inline]
      check_prevs_add kernel/locking/lockdep.c:3253 [inline]
      validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869
      __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137
      lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754
      __mutex_lock_common kernel/locking/mutex.c:608 [inline]
      __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
      unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789
     sk_setsockopt+0x207e/0x3360
      do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307
      __sys_setsockopt+0x1ad/0x250 net/socket.c:2334
      __do_sys_setsockopt net/socket.c:2343 [inline]
      __se_sys_setsockopt net/socket.c:2340 [inline]
      __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
     do_syscall_64+0xf9/0x240
     entry_SYSCALL_64_after_hwframe+0x6f/0x77
    RIP: 0033:0x7f78a1c7dda9
    Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
    RSP: 002b:00007f78a0fde0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
    RAX: ffffffffffffffda RBX: 00007f78a1dac050 RCX: 00007f78a1c7dda9
    RDX: 000000000000002a RSI: 0000000000000001 RDI: 0000000000000006
    RBP: 00007f78a1cca47a R08: 0000000000000004 R09: 0000000000000000
    R10: 0000000020000180 R11: 0000000000000246 R12: 0000000000000000
    R13: 000000000000006e R14: 00007f78a1dac050 R15: 00007ffe5cd81ae8
    
    Fixes: 859051dd165e ("bpf: Implement cgroup sockaddr hooks for unix sockets")
    Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
    Cc: Willem de Bruijn <willemdebruijn.kernel@xxxxxxxxx>
    Cc: Daan De Meyer <daan.j.demeyer@xxxxxxxxx>
    Cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
    Cc: Martin KaFai Lau <martin.lau@xxxxxxxxxx>
    Cc: David Ahern <dsahern@xxxxxxxxxx>
    Reviewed-by: Willem de Bruijn <willemb@xxxxxxxxxx>
    Reviewed-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
    Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/net/core/sock.c b/net/core/sock.c
index e5d43a068f8ed..20160865ede9c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1192,6 +1192,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
 		 */
 		WRITE_ONCE(sk->sk_txrehash, (u8)val);
 		return 0;
+	case SO_PEEK_OFF:
+		{
+		int (*set_peek_off)(struct sock *sk, int val);
+
+		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+		if (set_peek_off)
+			ret = set_peek_off(sk, val);
+		else
+			ret = -EOPNOTSUPP;
+		return ret;
+		}
 	}
 
 	sockopt_lock_sock(sk);
@@ -1434,18 +1445,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
 		break;
 
-	case SO_PEEK_OFF:
-		{
-		int (*set_peek_off)(struct sock *sk, int val);
-
-		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
-		if (set_peek_off)
-			ret = set_peek_off(sk, val);
-		else
-			ret = -EOPNOTSUPP;
-		break;
-		}
-
 	case SO_NOFCS:
 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
 		break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f631b0a21af4c..e474b201900f9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
 
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 {
-	if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
-		bool slow = lock_sock_fast(sk);
-
-		sk_peek_offset_bwd(sk, len);
-		unlock_sock_fast(sk, slow);
-	}
+	sk_peek_offset_bwd(sk, len);
 
 	if (!skb_unref(skb))
 		return;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 30b178ebba60a..0748e7ea5210e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 				  int);
 
-static int unix_set_peek_off(struct sock *sk, int val)
-{
-	struct unix_sock *u = unix_sk(sk);
-
-	if (mutex_lock_interruptible(&u->iolock))
-		return -EINTR;
-
-	WRITE_ONCE(sk->sk_peek_off, val);
-	mutex_unlock(&u->iolock);
-
-	return 0;
-}
-
 #ifdef CONFIG_PROC_FS
 static int unix_count_nr_fds(struct sock *sk)
 {
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
 	.read_skb =	unix_stream_read_skb,
 	.mmap =		sock_no_mmap,
 	.splice_read =	unix_stream_splice_read,
-	.set_peek_off =	unix_set_peek_off,
+	.set_peek_off =	sk_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
 };
 
@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.read_skb =	unix_read_skb,
 	.recvmsg =	unix_dgram_recvmsg,
 	.mmap =		sock_no_mmap,
-	.set_peek_off =	unix_set_peek_off,
+	.set_peek_off =	sk_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
 };
 
@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.sendmsg =	unix_seqpacket_sendmsg,
 	.recvmsg =	unix_seqpacket_recvmsg,
 	.mmap =		sock_no_mmap,
-	.set_peek_off =	unix_set_peek_off,
+	.set_peek_off =	sk_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
 };
 




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux