From: Petar Penkov <ppenkov@xxxxxxxxxx> This patch allows generation of a SYN cookie before an SKB has been allocated, as is the case at XDP. Signed-off-by: Petar Penkov <ppenkov@xxxxxxxxxx> --- include/net/tcp.h | 11 +++++++ net/ipv4/tcp_input.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 8 +++++ net/ipv6/tcp_ipv6.c | 8 +++++ 4 files changed, 103 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index cca3c59b98bf..a128e22c0d5d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -414,6 +414,17 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb, int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); +/* + * BPF SKB-less helpers + */ +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *tch, u32 *cookie); +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *tch, u32 *cookie); +u16 tcp_get_syncookie(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, void *iph, struct tcphdr *tch, + u32 *cookie); /* * TCP v4 functions exported for the inet6 API */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8892df6de1d4..893b275a6d49 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th, #endif } +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped + * value on success. + */ +static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +{ + const unsigned char *ptr = (const unsigned char *)(th + 1); + int length = (th->doff * 4) - sizeof(struct tcphdr); + u16 mss = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return mss; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + if (length < 2) + return mss; + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return mss; + if (opsize > length) + return mss; /* fail on partial options */ + if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) { + u16 in_mss = get_unaligned_be16(ptr); + + if (in_mss) { + if (user_mss && user_mss < in_mss) + in_mss = user_mss; + mss = in_mss; + } + } + ptr += opsize - 2; + length -= opsize; + } + } + return mss; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -6464,6 +6507,39 @@ static void tcp_reqsk_record_syn(const struct sock *sk, } } +u16 tcp_get_syncookie(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, void *iph, struct tcphdr *th, + u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + bool is_v4 = rsk_ops->family == AF_INET; + struct tcp_sock *tp = tcp_sk(sk); + + if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + + if (!tcp_syn_flood_action(sk, rsk_ops->slab_name)) + return 0; + + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + return 0; + } + + mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + if (!mss) + mss = af_ops->mss_clamp; + + tcp_synq_overflow(sk); + *cookie = is_v4 ? __cookie_v4_init_sequence(iph, th, &mss) + : __cookie_v6_init_sequence(iph, th, &mss); +#endif + return mss; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d57641cb3477..0e06e59784bd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1515,6 +1515,14 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *tch, u32 *cookie) +{ + return tcp_get_syncookie(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, iph, tch, + cookie); +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5da069e91cac..102f68c3152d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1063,6 +1063,14 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *tch, u32 *cookie) +{ + return tcp_get_syncookie(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, iph, tch, + cookie); +} + static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) -- 2.22.0.657.g960e92d24f-goog