On 10/19/21 7:46 AM, Maxim Mikityanskiy wrote: > The new helper bpf_tcp_raw_gen_tscookie allows an XDP program to > generate timestamp cookies (to be used together with SYN cookies) which > encode different options set by the client in the SYN packet: SACK > support, ECN support, window scale. These options are encoded in lower > bits of the timestamp, which will be returned by the client in a > subsequent ACK packet. The format is the same used by synproxy. > > Signed-off-by: Maxim Mikityanskiy <maximmi@xxxxxxxxxx> > Reviewed-by: Tariq Toukan <tariqt@xxxxxxxxxx> > --- > include/net/tcp.h | 1 + > include/uapi/linux/bpf.h | 27 +++++++++++++++ > net/core/filter.c | 38 +++++++++++++++++++++ > net/ipv4/syncookies.c | 60 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 27 +++++++++++++++ > 5 files changed, 153 insertions(+) > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 1cc96a225848..651820bef6a2 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -564,6 +564,7 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, > u16 *mssp); > __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); > u64 cookie_init_timestamp(struct request_sock *req, u64 now); > +bool cookie_init_timestamp_raw(struct tcphdr *th, __be32 *tsval, __be32 *tsecr); > bool cookie_timestamp_decode(const struct net *net, > struct tcp_options_received *opt); > bool cookie_ecn_ok(const struct tcp_options_received *opt, > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index e32f72077250..791790b41874 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -5053,6 +5053,32 @@ union bpf_attr { > * > * **-EPROTONOSUPPORT** if the IP version is not 4 or 6 (or 6, but > * CONFIG_IPV6 is disabled). > + * > + * int bpf_tcp_raw_gen_tscookie(struct tcphdr *th, u32 th_len, __be32 *tsopt, u32 tsopt_len) > + * Description > + * Try to generate a timestamp cookie which encodes some of the > + * flags sent by the client in the SYN packet: SACK support, ECN > + * support, window scale. To be used with SYN cookies. > + * > + * *th* points to the start of the TCP header of the client's SYN > + * packet, while *th_len* contains the length of the TCP header (at > + * least **sizeof**\ (**struct tcphdr**)). > + * > + * *tsopt* points to the output location where to put the resulting > + * timestamp values: tsval and tsecr, in the format of the TCP > + * timestamp option. > + * > + * Return > + * On success, 0. > + * > + * On failure, the returned value is one of the following: > + * > + * **-EINVAL** if the input arguments are invalid. > + * > + * **-ENOENT** if the TCP header doesn't have the timestamp option. > + * > + * **-EOPNOTSUPP** if the kernel configuration does not enable SYN > + * cookies (CONFIG_SYN_COOKIES is off). > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5238,6 +5264,7 @@ union bpf_attr { > FN(ct_release), \ > FN(tcp_raw_gen_syncookie), \ > FN(tcp_raw_check_syncookie), \ > + FN(tcp_raw_gen_tscookie), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/net/core/filter.c b/net/core/filter.c > index 5f03d4a282a0..73fe20ef7442 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -7403,6 +7403,42 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_proto = { > .arg4_type = ARG_CONST_SIZE, > }; > > +BPF_CALL_4(bpf_tcp_raw_gen_tscookie, struct tcphdr *, th, u32, th_len, > + __be32 *, tsopt, u32, tsopt_len) > +{ > + int err; > + > +#ifdef CONFIG_SYN_COOKIES > + if (tsopt_len != sizeof(u64)) { > + err = -EINVAL; > + goto err_out; > + } > + > + if (!cookie_init_timestamp_raw(th, &tsopt[0], &tsopt[1])) { > + err = -ENOENT; > + goto err_out; > + } > + > + return 0; > +err_out: > +#else > + err = -EOPNOTSUPP; > +#endif > + memset(tsopt, 0, tsopt_len); > + return err; > +} > + > +static const struct bpf_func_proto bpf_tcp_raw_gen_tscookie_proto = { > + .func = bpf_tcp_raw_gen_tscookie, > + .gpl_only = false, > + .pkt_access = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_MEM, > + .arg2_type = ARG_CONST_SIZE, > + .arg3_type = ARG_PTR_TO_UNINIT_MEM, > + .arg4_type = ARG_CONST_SIZE, > +}; > + > #endif /* CONFIG_INET */ > > bool bpf_helper_changes_pkt_data(void *func) > @@ -7825,6 +7861,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_tcp_raw_gen_syncookie_proto; > case BPF_FUNC_tcp_raw_check_syncookie: > return &bpf_tcp_raw_check_syncookie_proto; > + case BPF_FUNC_tcp_raw_gen_tscookie: > + return &bpf_tcp_raw_gen_tscookie_proto; > #endif > default: > return bpf_sk_base_func_proto(func_id); > diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c > index 8696dc343ad2..4dd2c7a096eb 100644 > --- a/net/ipv4/syncookies.c > +++ b/net/ipv4/syncookies.c > @@ -85,6 +85,66 @@ u64 cookie_init_timestamp(struct request_sock *req, u64 now) > return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ); > } > > +bool cookie_init_timestamp_raw(struct tcphdr *th, __be32 *tsval, __be32 *tsecr) > +{ > + int length = (th->doff * 4) - sizeof(*th); > + u8 wscale = TS_OPT_WSCALE_MASK; > + bool option_timestamp = false; > + bool option_sack = false; > + u32 cookie; > + u8 *ptr; > + > + ptr = (u8 *)(th + 1); > + > + while (length > 0) { > + u8 opcode = *ptr++; > + u8 opsize; > + > + if (opcode == TCPOPT_EOL) > + break; > + if (opcode == TCPOPT_NOP) { > + length--; > + continue; > + } > + > + if (length < 2) > + break; > + opsize = *ptr++; > + if (opsize < 2) > + break; > + if (opsize > length) > + break; > + > + switch (opcode) { > + case TCPOPT_WINDOW: You must check osize. > + wscale = min_t(u8, *ptr, TCP_MAX_WSCALE); > + break; > + case TCPOPT_TIMESTAMP: You must check opsize. > + option_timestamp = true; > + /* Client's tsval becomes our tsecr. */ > + *tsecr = cpu_to_be32(get_unaligned_be32(ptr)); Please avoid useless ntohl/htonl dance (even if compiler probably optimizes this) No need to obfuscate :) *tsecr = get_unaligned((__be32 *)ptr); > + break; > + case TCPOPT_SACK_PERM: > + option_sack = true; > + break; > + } > + > + ptr += opsize - 2; > + length -= opsize; > + } > + > + if (!option_timestamp) > + return false; > + > + cookie = tcp_time_stamp_raw() & ~TSMASK; > + cookie |= wscale & TS_OPT_WSCALE_MASK; > + if (option_sack) > + cookie |= TS_OPT_SACK; > + if (th->ece && th->cwr) > + cookie |= TS_OPT_ECN; > + *tsval = cpu_to_be32(cookie); > + return true; > +} > > static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, > __be16 dport, __u32 sseq, __u32 data) > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index e32f72077250..791790b41874 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -5053,6 +5053,32 @@ union bpf_attr { > * > * **-EPROTONOSUPPORT** if the IP version is not 4 or 6 (or 6, but > * CONFIG_IPV6 is disabled). > + * > + * int bpf_tcp_raw_gen_tscookie(struct tcphdr *th, u32 th_len, __be32 *tsopt, u32 tsopt_len) > + * Description > + * Try to generate a timestamp cookie which encodes some of the > + * flags sent by the client in the SYN packet: SACK support, ECN > + * support, window scale. To be used with SYN cookies. > + * > + * *th* points to the start of the TCP header of the client's SYN > + * packet, while *th_len* contains the length of the TCP header (at > + * least **sizeof**\ (**struct tcphdr**)). > + * > + * *tsopt* points to the output location where to put the resulting > + * timestamp values: tsval and tsecr, in the format of the TCP > + * timestamp option. > + * > + * Return > + * On success, 0. > + * > + * On failure, the returned value is one of the following: > + * > + * **-EINVAL** if the input arguments are invalid. > + * > + * **-ENOENT** if the TCP header doesn't have the timestamp option. > + * > + * **-EOPNOTSUPP** if the kernel configuration does not enable SYN > + * cookies (CONFIG_SYN_COOKIES is off). > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5238,6 +5264,7 @@ union bpf_attr { > FN(ct_release), \ > FN(tcp_raw_gen_syncookie), \ > FN(tcp_raw_check_syncookie), \ > + FN(tcp_raw_gen_tscookie), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper >