> What happens if two sockets matching each other reach here ? Not sure. In general, an attempt to use rather external thing (struct sctp_hashbucket) to implement extra synchronization for reuse innards looks somewhat weird. So let's look at reuseport_add_sock() again. Note extra comments: int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) { struct sock_reuseport *old_reuse, *reuse; /* RCU access _outside_ of reuseport_lock'ed section */ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2, bind_inany); if (err) return err; } spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); if (old_reuse && old_reuse->num_closed_socks) { /* sk was shutdown()ed before */ int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany); spin_unlock_bh(&reuseport_lock); return err; } if (old_reuse && old_reuse->num_socks != 1) { spin_unlock_bh(&reuseport_lock); return -EBUSY; } if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); if (!reuse) { spin_unlock_bh(&reuseport_lock); return -ENOMEM; } } __reuseport_add_sock(sk, reuse); /* RCU access _inside_ of reuseport_lock'ed section */ rcu_assign_pointer(sk->sk_reuseport_cb, reuse); spin_unlock_bh(&reuseport_lock); if (old_reuse) call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } Why this is so? I've tried to extend reuseport_lock'ed section to include the first rcu_access_pointer() in subject as well, e.g.: diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 5a165286e4d8..39a353ab8ff8 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -186,16 +186,11 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) return reuse; } -int reuseport_alloc(struct sock *sk, bool bind_inany) +static int reuseport_alloc_unlocked(struct sock *sk, bool bind_inany) { struct sock_reuseport *reuse; int id, ret = 0; - /* bh lock used since this function call may precede hlist lock in - * soft irq of receive path or setsockopt from process context - */ - spin_lock_bh(&reuseport_lock); - /* Allocation attempts can occur concurrently via the setsockopt path * and the bind/hash path. Nothing to do when we lose the race. */ @@ -236,8 +231,19 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) reuse->num_socks = 1; reuseport_get_incoming_cpu(sk, reuse); rcu_assign_pointer(sk->sk_reuseport_cb, reuse); - out: + return ret; +} + +int reuseport_alloc(struct sock *sk, bool bind_inany) +{ + int ret; + + /* bh lock used since this function call may precede hlist lock in + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); + ret = reuseport_alloc_unlocked(sk, bind_inany); spin_unlock_bh(&reuseport_lock); return ret; @@ -322,14 +328,17 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) { struct sock_reuseport *old_reuse, *reuse; + spin_lock_bh(&reuseport_lock); + if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { - int err = reuseport_alloc(sk2, bind_inany); + int err = reuseport_alloc_unlocked(sk2, bind_inany); - if (err) + if (err) { + spin_unlock_bh(&reuseport_lock); return err; + } } - spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, and this seems fixes the original crash as well. Dmitry