From: Martin KaFai Lau <martin.lau@xxxxxxxxx> Date: Wed, 22 Nov 2023 15:19:29 -0800 > On 11/21/23 10:42 AM, Kuniyuki Iwashima wrote: > > diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h > > index 533a7337865a..9a67f47a5e64 100644 > > --- a/include/net/inet6_hashtables.h > > +++ b/include/net/inet6_hashtables.h > > @@ -116,9 +116,23 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, > > if (!sk) > > return NULL; > > > > - if (!prefetched || !sk_fullsock(sk)) > > + if (!prefetched) > > return sk; > > > > + if (sk->sk_state == TCP_NEW_SYN_RECV) { > > +#if IS_ENABLED(CONFIG_SYN_COOKIE) > > + if (inet_reqsk(sk)->syncookie) { > > + *refcounted = false; > > + skb->sk = sk; > > + skb->destructor = sock_pfree; > > Instead of re-init the skb->sk and skb->destructor, can skb_steal_sock() avoid > resetting them to NULL in the first place and skb_steal_sock() returns the > rsk_listener instead? Yes, but we need to move skb_steal_sock() to request_sock.h or include it just before skb_steal_sock() in sock.h like below. When I include request_sock.h in top of sock.h, there were many build errors. > btw, can inet_reqsk(sk)->rsk_listener be set to NULL after > this point? except for sock_pfree(), we will not set NULL until cookie_bpf_check(). > Beside, it is essentially assigning the incoming request to a listening sk. Does > it need to call the inet6_lookup_reuseport() a few lines below to avoid skipping > the bpf reuseport selection that was fixed in commit 9c02bec95954 ("bpf, net: > Support SO_REUSEPORT sockets with bpf_sk_assign")? Ah, good point. I assumed bpf_sk_lookup() will do the random pick, but we need to call it just in case sk is picked from bpf map. As you suggested, if we return rsk_listener from skb_steal_sock(), we can reuse the reuseport_lookup call in inet6_steal_sock(). Thanks! ---8<--- diff --git a/include/net/sock.h b/include/net/sock.h index 1d6931caf0c3..83efbe0e7c3b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2838,6 +2838,8 @@ sk_is_refcounted(struct sock *sk) return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); } +#include <net/request_sock.h> + /** * skb_steal_sock - steal a socket from an sk_buff * @skb: sk_buff to steal the socket from @@ -2847,20 +2849,38 @@ sk_is_refcounted(struct sock *sk) static inline struct sock * skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) { - if (skb->sk) { - struct sock *sk = skb->sk; + struct sock *sk = skb->sk; + if (!sk) { + *prefetched = false; + *refcounted = false; + return NULL; + } + + *prefetched = skb_sk_is_prefetched(skb); + if (!*prefetched) { *refcounted = true; - *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) - *refcounted = sk_is_refcounted(sk); - skb->destructor = NULL; - skb->sk = NULL; - return sk; + goto out; } - *prefetched = false; - *refcounted = false; - return NULL; + + switch (sk->sk_state) { + case TCP_NEW_SYN_RECV: + if (inet_reqsk(sk)->syncookie) { + *refcounted = false; + return inet_reqsk(sk)->rsk_listener; + } + fallthrough; + case TCP_TIME_WAIT: + *refcounted = true; + break; + default: + *refcounted = !sock_flag(sk, SOCK_RCU_FREE); + } + +out: + skb->destructor = NULL; + skb->sk = NULL; + return sk; } /* Checks if this SKB belongs to an HW offloaded socket ---8<---