On 12/1/20 3:44 PM, Kuniyuki Iwashima wrote: > A TFO request socket is only freed after BOTH 3WHS has completed (or > aborted) and the child socket has been accepted (or its listener has been > closed). Hence, depending on the order, there can be two kinds of request > sockets in the accept queue. > > 3WHS -> accept : TCP_ESTABLISHED > accept -> 3WHS : TCP_SYN_RECV > > Unlike TCP_ESTABLISHED socket, accept() does not free the request socket > for TCP_SYN_RECV socket. It is freed later at reqsk_fastopen_remove(). > Also, it accesses request_sock.rsk_listener. So, in order to complete TFO > socket migration, we have to set the current listener to it at accept() > before reqsk_fastopen_remove(). > > Moreover, if TFO request caused RST before 3WHS has completed, it is held > in the listener's TFO queue to prevent DDoS attack. Thus, we also have to > migrate the requests in TFO queue. > > Reviewed-by: Benjamin Herrenschmidt <benh@xxxxxxxxxx> > Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx> > --- > net/ipv4/inet_connection_sock.c | 35 ++++++++++++++++++++++++++++++++- > 1 file changed, 34 insertions(+), 1 deletion(-) > > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index b27241ea96bd..361efe55b1ad 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -500,6 +500,16 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) > tcp_rsk(req)->tfo_listener) { > spin_lock_bh(&queue->fastopenq.lock); > if (tcp_rsk(req)->tfo_listener) { > + if (req->rsk_listener != sk) { > + /* TFO request was migrated to another listener so > + * the new listener must be used in reqsk_fastopen_remove() > + * to hold requests which cause RST. > + */ > + sock_put(req->rsk_listener); > + sock_hold(sk); > + req->rsk_listener = sk; > + } > + > /* We are still waiting for the final ACK from 3WHS > * so can't free req now. Instead, we set req->sk to > * NULL to signify that the child socket is taken > @@ -954,7 +964,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, > > if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { > BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); > - BUG_ON(sk != req->rsk_listener); > > /* Paranoid, to prevent race condition if > * an inbound pkt destined for child is > @@ -995,6 +1004,7 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_add); > void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) > { > struct request_sock_queue *old_accept_queue, *new_accept_queue; > + struct fastopen_queue *old_fastopenq, *new_fastopenq; > > old_accept_queue = &inet_csk(sk)->icsk_accept_queue; > new_accept_queue = &inet_csk(nsk)->icsk_accept_queue; > @@ -1019,6 +1029,29 @@ void inet_csk_reqsk_queue_migrate(struct sock *sk, struct sock *nsk) > > spin_unlock(&new_accept_queue->rskq_lock); > spin_unlock(&old_accept_queue->rskq_lock); > + > + old_fastopenq = &old_accept_queue->fastopenq; > + new_fastopenq = &new_accept_queue->fastopenq; > + > + spin_lock_bh(&old_fastopenq->lock); > + spin_lock_bh(&new_fastopenq->lock); Same remark about lockdep being not happy with this (I guess) > + > + new_fastopenq->qlen += old_fastopenq->qlen; > + old_fastopenq->qlen = 0; > + > + if (old_fastopenq->rskq_rst_head) { > + if (new_fastopenq->rskq_rst_head) > + old_fastopenq->rskq_rst_tail->dl_next = new_fastopenq->rskq_rst_head; > + else > + old_fastopenq->rskq_rst_tail = new_fastopenq->rskq_rst_tail; > + > + new_fastopenq->rskq_rst_head = old_fastopenq->rskq_rst_head; > + old_fastopenq->rskq_rst_head = NULL; > + old_fastopenq->rskq_rst_tail = NULL; > + } > + > + spin_unlock_bh(&new_fastopenq->lock); > + spin_unlock_bh(&old_fastopenq->lock); > } > EXPORT_SYMBOL(inet_csk_reqsk_queue_migrate); > >