This patch adds a new SOCK_OPS hook to validate arbitrary SYN Cookie. When the kernel receives ACK for SYN Cookie, the hook is invoked with bpf_sock_ops.op == BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB if the listener has BPF_SOCK_OPS_SYNCOOKIE_CB_FLAG set by bpf_sock_ops_cb_flags_set(). The BPF program can access the following information to validate ISN: bpf_sock_ops.sk : 4-tuple bpf_sock_ops.skb : TCP header bpf_sock_ops.args[0] : ISN The program must decode MSS and set it to bpf_sock_ops.replylong[0]. By default, the kernel validates SYN Cookie before allocating reqsk, but the hook is invoked after allocating reqsk to keep the user interface consistent with BPF_SOCK_OPS_GEN_SYNCOOKIE_CB. Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx> --- include/net/tcp.h | 12 ++++++ include/uapi/linux/bpf.h | 20 +++++++--- net/ipv4/syncookies.c | 73 +++++++++++++++++++++++++++------- net/ipv6/syncookies.c | 44 +++++++++++++------- tools/include/uapi/linux/bpf.h | 20 +++++++--- 5 files changed, 130 insertions(+), 39 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 676618c89bb7..90d95acdc34a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2158,6 +2158,18 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return ops->cookie_init_seq(skb, mss); } + +#ifdef CONFIG_CGROUP_BPF +int bpf_skops_cookie_check(struct sock *sk, struct request_sock *req, + struct sk_buff *skb); +#else +static inline int bpf_skops_cookie_check(struct sock *sk, struct request_sock *req, + struct sk_buff *skb) +{ + return 0; +} +#endif + #else static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d3cc530613c0..e6f1507d7895 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6738,13 +6738,16 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), - /* Call bpf when the kernel generates SYN Cookie (ISN) for SYN+ACK. + /* Call bpf when the kernel generates SYN Cookie (ISN) for SYN+ACK + * and validates ACK for SYN Cookie. * - * The bpf prog will be called to encode MSS into SYN Cookie with - * sock_ops->op == BPF_SOCK_OPS_GEN_SYNCOOKIE_CB. + * The bpf prog will be first called to encode MSS into SYN Cookie + * with sock_ops->op == BPF_SOCK_OPS_GEN_SYNCOOKIE_CB. Then, the + * bpf prog will be called to decode MSS from SYN Cookie with + * sock_ops->op == BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB. * - * Please refer to the comment in BPF_SOCK_OPS_GEN_SYNCOOKIE_CB for - * input and output. + * Please refer to the comment in BPF_SOCK_OPS_GEN_SYNCOOKIE_CB and + * BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB for input and output. */ BPF_SOCK_OPS_SYNCOOKIE_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ @@ -6868,6 +6871,13 @@ enum { * * replylong[0]: ISN */ + BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB,/* Validate SYN Cookie and set + * MSS. + * + * args[0]: ISN + * + * replylong[0]: MSS + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 514f1a4abdee..b1dd415863ff 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -317,6 +317,37 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, } EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); +#if IS_ENABLED(CONFIG_CGROUP_BPF) && IS_ENABLED(CONFIG_SYN_COOKIES) +int bpf_skops_cookie_check(struct sock *sk, struct request_sock *req, struct sk_buff *skb) +{ + struct bpf_sock_ops_kern sock_ops; + + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + + sock_ops.op = BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB; + sock_ops.sk = req_to_sk(req); + sock_ops.args[0] = tcp_rsk(req)->snt_isn; + + bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); + + if (BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk)) + goto err; + + if (!sock_ops.replylong[0]) + goto err; + + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + + return sock_ops.replylong[0]; + +err: + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + + return 0; +} +EXPORT_SYMBOL_GPL(bpf_skops_cookie_check); +#endif + /* On input, sk is a listener. * Output is listener if incoming packet would not create a child * NULL if memory could not be allocated. @@ -336,6 +367,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) int full_space, mss; struct flowi4 fl4; struct rtable *rt; + bool bpf_cookie; __u8 rcv_wscale; u32 tsoff = 0; @@ -343,16 +375,19 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - if (tcp_synq_no_recent_overflow(sk)) - goto out; + bpf_cookie = BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_SYNCOOKIE_CB_FLAG); + if (!bpf_cookie) { + if (tcp_synq_no_recent_overflow(sk)) + goto out; - mss = __cookie_v4_check(ip_hdr(skb), th, cookie); - if (mss == 0) { - __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); - goto out; - } + mss = __cookie_v4_check(ip_hdr(skb), th, cookie); + if (mss == 0) { + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); + goto out; + } - __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); + } /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -365,7 +400,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(net, &tcp_opt)) + if (!bpf_cookie && !cookie_timestamp_decode(net, &tcp_opt)) goto out; req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, @@ -375,21 +410,31 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; - treq->ts_off = tsoff; - treq->txhash = net_tx_rndhash(); - req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; + treq->snt_isn = cookie; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + + if (bpf_cookie) { + mss = bpf_skops_cookie_check(sk, req, skb); + if (!mss) { + reqsk_free(req); + goto out; + } + } + + req->mss = mss; ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->rcv_isn = ntohl(th->seq) - 1; + treq->ts_off = tsoff; + treq->txhash = net_tx_rndhash(); treq->snt_synack = 0; treq->tfo_listener = false; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 60bdc4d9150b..3e920e7eb5d3 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -139,6 +139,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; struct sock *ret = sk; int full_space, mss; + bool bpf_cookie; __u8 rcv_wscale; u32 tsoff = 0; @@ -146,16 +147,19 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - if (tcp_synq_no_recent_overflow(sk)) - goto out; + bpf_cookie = BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_SYNCOOKIE_CB_FLAG); + if (!bpf_cookie) { + if (tcp_synq_no_recent_overflow(sk)) + goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); - if (mss == 0) { - __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); - goto out; - } + mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + if (mss == 0) { + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); + goto out; + } - __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); + } /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -168,7 +172,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(net, &tcp_opt)) + if (!bpf_cookie && !cookie_timestamp_decode(net, &tcp_opt)) goto out; req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, @@ -177,17 +181,25 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_drop; ireq = inet_rsk(req); + ireq->ir_rmt_port = th->source; + ireq->ir_num = ntohs(th->dest); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + treq = tcp_rsk(req); - treq->tfo_listener = false; + treq->snt_isn = cookie; + + if (bpf_cookie) { + mss = bpf_skops_cookie_check(sk, req, skb); + if (!mss) { + reqsk_free(req); + goto out; + } + } if (security_inet_conn_request(sk, skb, req)) goto out_free; - req->mss = mss; - ireq->ir_rmt_port = th->source; - ireq->ir_num = ntohs(th->dest); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -203,6 +215,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); + req->mss = mss; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -210,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; + treq->tfo_listener = false; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = tsoff; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d3cc530613c0..e6f1507d7895 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6738,13 +6738,16 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), - /* Call bpf when the kernel generates SYN Cookie (ISN) for SYN+ACK. + /* Call bpf when the kernel generates SYN Cookie (ISN) for SYN+ACK + * and validates ACK for SYN Cookie. * - * The bpf prog will be called to encode MSS into SYN Cookie with - * sock_ops->op == BPF_SOCK_OPS_GEN_SYNCOOKIE_CB. + * The bpf prog will be first called to encode MSS into SYN Cookie + * with sock_ops->op == BPF_SOCK_OPS_GEN_SYNCOOKIE_CB. Then, the + * bpf prog will be called to decode MSS from SYN Cookie with + * sock_ops->op == BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB. * - * Please refer to the comment in BPF_SOCK_OPS_GEN_SYNCOOKIE_CB for - * input and output. + * Please refer to the comment in BPF_SOCK_OPS_GEN_SYNCOOKIE_CB and + * BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB for input and output. */ BPF_SOCK_OPS_SYNCOOKIE_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ @@ -6868,6 +6871,13 @@ enum { * * replylong[0]: ISN */ + BPF_SOCK_OPS_CHECK_SYNCOOKIE_CB,/* Validate SYN Cookie and set + * MSS. + * + * args[0]: ISN + * + * replylong[0]: MSS + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- 2.30.2