On Tue, Dec 01, 2020 at 11:44:12PM +0900, Kuniyuki Iwashima wrote: > This patch renames reuseport_select_sock() to __reuseport_select_sock() and > adds two wrapper function of it to pass the migration type defined in the > previous commit. > > reuseport_select_sock : BPF_SK_REUSEPORT_MIGRATE_NO > reuseport_select_migrated_sock : BPF_SK_REUSEPORT_MIGRATE_REQUEST > > As mentioned before, we have to select a new listener for TCP_NEW_SYN_RECV > requests at receiving the final ACK or sending a SYN+ACK. Therefore, this > patch also changes the code to call reuseport_select_migrated_sock() even > if the listening socket is TCP_CLOSE. If we can pick out a listening socket > from the reuseport group, we rewrite request_sock.rsk_listener and resume > processing the request. > > Reviewed-by: Benjamin Herrenschmidt <benh@xxxxxxxxxx> > Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx> > --- > include/net/inet_connection_sock.h | 12 +++++++++++ > include/net/request_sock.h | 13 ++++++++++++ > include/net/sock_reuseport.h | 8 +++---- > net/core/sock_reuseport.c | 34 ++++++++++++++++++++++++------ > net/ipv4/inet_connection_sock.c | 13 ++++++++++-- > net/ipv4/tcp_ipv4.c | 9 ++++++-- > net/ipv6/tcp_ipv6.c | 9 ++++++-- > 7 files changed, 81 insertions(+), 17 deletions(-) > > diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h > index 2ea2d743f8fc..1e0958f5eb21 100644 > --- a/include/net/inet_connection_sock.h > +++ b/include/net/inet_connection_sock.h > @@ -272,6 +272,18 @@ static inline void inet_csk_reqsk_queue_added(struct sock *sk) > reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); > } > > +static inline void inet_csk_reqsk_queue_migrated(struct sock *sk, > + struct sock *nsk, > + struct request_sock *req) > +{ > + reqsk_queue_migrated(&inet_csk(sk)->icsk_accept_queue, > + &inet_csk(nsk)->icsk_accept_queue, > + req); > + sock_put(sk); not sure if it is safe to do here. IIUC, when the req->rsk_refcnt is held, it also holds a refcnt to req->rsk_listener such that sock_hold(req->rsk_listener) is safe because its sk_refcnt is not zero. > + sock_hold(nsk); > + req->rsk_listener = nsk; > +} > + [ ... ] > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index 361efe55b1ad..e71653c6eae2 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -743,8 +743,17 @@ static void reqsk_timer_handler(struct timer_list *t) > struct request_sock_queue *queue = &icsk->icsk_accept_queue; > int max_syn_ack_retries, qlen, expire = 0, resend = 0; > > - if (inet_sk_state_load(sk_listener) != TCP_LISTEN) > - goto drop; > + if (inet_sk_state_load(sk_listener) != TCP_LISTEN) { > + sk_listener = reuseport_select_migrated_sock(sk_listener, > + req_to_sk(req)->sk_hash, NULL); > + if (!sk_listener) { > + sk_listener = req->rsk_listener; > + goto drop; > + } > + inet_csk_reqsk_queue_migrated(req->rsk_listener, sk_listener, req); > + icsk = inet_csk(sk_listener); > + queue = &icsk->icsk_accept_queue; > + } > > max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; > /* Normally all the openreqs are young and become mature > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index e4b31e70bd30..9a9aa27c6069 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -1973,8 +1973,13 @@ int tcp_v4_rcv(struct sk_buff *skb) > goto csum_error; > } > if (unlikely(sk->sk_state != TCP_LISTEN)) { > - inet_csk_reqsk_queue_drop_and_put(sk, req); > - goto lookup; > + nsk = reuseport_select_migrated_sock(sk, req_to_sk(req)->sk_hash, skb); > + if (!nsk) { > + inet_csk_reqsk_queue_drop_and_put(sk, req); > + goto lookup; > + } > + inet_csk_reqsk_queue_migrated(sk, nsk, req); > + sk = nsk; > } > /* We own a reference on the listener, increase it again > * as we might lose it too soon. > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c > index 992cbf3eb9e3..ff11f3c0cb96 100644 > --- a/net/ipv6/tcp_ipv6.c > +++ b/net/ipv6/tcp_ipv6.c > @@ -1635,8 +1635,13 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) > goto csum_error; > } > if (unlikely(sk->sk_state != TCP_LISTEN)) { > - inet_csk_reqsk_queue_drop_and_put(sk, req); > - goto lookup; > + nsk = reuseport_select_migrated_sock(sk, req_to_sk(req)->sk_hash, skb); > + if (!nsk) { > + inet_csk_reqsk_queue_drop_and_put(sk, req); > + goto lookup; > + } > + inet_csk_reqsk_queue_migrated(sk, nsk, req); > + sk = nsk; > } > sock_hold(sk); For example, this sock_hold(sk). sk here is req->rsk_listener. > refcounted = true; > -- > 2.17.2 (Apple Git-113) >