From: Martin KaFai Lau <kafai@xxxxxx> Date: Tue, 4 May 2021 21:56:18 -0700 > On Tue, Apr 27, 2021 at 12:46:18PM +0900, Kuniyuki Iwashima wrote: > [ ... ] > > > diff --git a/net/core/request_sock.c b/net/core/request_sock.c > > index 82cf9fbe2668..08c37ecd923b 100644 > > --- a/net/core/request_sock.c > > +++ b/net/core/request_sock.c > > @@ -151,6 +151,7 @@ struct request_sock *reqsk_clone(struct request_sock *req, struct sock *sk) > > memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, > > req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end)); > > > > + sk_node_init(&nreq_sk->sk_node); > This belongs to patch 5. > "rsk_refcnt" also needs to be 0 instead of staying uninitialized > after reqsk_clone() returned. I'll move this part to patch 5 and initialize refcnt as 0 in reqsk_clone() like reqsk_alloc(). > > > nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; > > #ifdef CONFIG_XPS > > nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; > > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > > index 851992405826..dc984d1f352e 100644 > > --- a/net/ipv4/inet_connection_sock.c > > +++ b/net/ipv4/inet_connection_sock.c > > @@ -695,10 +695,20 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) > > } > > EXPORT_SYMBOL(inet_rtx_syn_ack); > > > > +static void reqsk_queue_migrated(struct request_sock_queue *queue, > > + const struct request_sock *req) > > +{ > > + if (req->num_timeout == 0) > > + atomic_inc(&queue->young); > > + atomic_inc(&queue->qlen); > > +} > > + > > static void reqsk_migrate_reset(struct request_sock *req) > > { > > + req->saved_syn = NULL; > > + inet_rsk(req)->ireq_opt = NULL; > > #if IS_ENABLED(CONFIG_IPV6) > > - inet_rsk(req)->ipv6_opt = NULL; > > + inet_rsk(req)->pktopts = NULL; > > #endif > > } > > > > @@ -741,16 +751,37 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); > > > > static void reqsk_timer_handler(struct timer_list *t) > > { > > - struct request_sock *req = from_timer(req, t, rsk_timer); > > - struct sock *sk_listener = req->rsk_listener; > > - struct net *net = sock_net(sk_listener); > > - struct inet_connection_sock *icsk = inet_csk(sk_listener); > > - struct request_sock_queue *queue = &icsk->icsk_accept_queue; > > + struct request_sock *req = from_timer(req, t, rsk_timer), *nreq = NULL, *oreq = req; > nit. This line is too long. > Lets move the new "*nreq" and "*oreg" to a new line and keep the current > "*req" line as is: > struct request_sock *req = from_timer(req, t, rsk_timer); > struct request_sock *oreq = req, *nreq = NULL; I'll fix that. > > > + struct sock *sk_listener = req->rsk_listener, *nsk = NULL; > "*nsk" can be moved into the following "!= TCP_LISTEN" case below. > Keep the current "*sk_listener" line as is. I'll move the nsk's definition. Thank you. > > > + struct inet_connection_sock *icsk; > > + struct request_sock_queue *queue; > > + struct net *net; > > int max_syn_ack_retries, qlen, expire = 0, resend = 0; > > > > - if (inet_sk_state_load(sk_listener) != TCP_LISTEN) > > - goto drop; > > + if (inet_sk_state_load(sk_listener) != TCP_LISTEN) { > > struct sock *nsk; > > > + nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL); > > + if (!nsk) > > + goto drop; > > + > > + nreq = reqsk_clone(req, nsk); > > + if (!nreq) > > + goto drop; > > + > > + /* The new timer for the cloned req can decrease the 2 > > + * by calling inet_csk_reqsk_queue_drop_and_put(), so > > + * hold another count to prevent use-after-free and > > + * call reqsk_put() just before return. > > + */ > > + refcount_set(&nreq->rsk_refcnt, 2 + 1); > > + timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED); > > + reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req); > > + > > + req = nreq; > > + sk_listener = nsk; > > + }