In the latter patch, the bpf prog only wants to be called to handle a header option if that particular header option cannot be handled by the kernel. This unknown option could be written by the peer's bpf-prog. It could also be a new standard option that the running kernel does not support it while a bpf-prog can handle it. In a latter patch, the bpf prog will be called from tcp_validate_incoming() if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags. Instead of using skb->cb[] in an earlier attempt, this patch adds an optional arg "bool *unknown_opt" to tcp_parse_options(). The bool will be set to true if it has encountered an option that the kernel does not recognize. Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 40 +++++++++++++++++++++----------- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv6/syncookies.c | 2 +- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 30e08bcc9afb..dedca6576bb9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); + tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL); req = __skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/net/tcp.h b/include/net/tcp.h index 895e7aabf136..d49d8f1c961a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock, #endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, struct tcp_fastopen_cookie *foc); + int estab, struct tcp_fastopen_cookie *foc, + bool *unknown_opt); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9a4f6b16c9bc..fd39aed4fcd3 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcp_ts_off(sock_net(sk), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c38ca9de17e..d9c878001be2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3799,7 +3799,7 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, foc->exp = exp_opt; } -static void smc_parse_options(const struct tcphdr *th, +static bool smc_parse_options(const struct tcphdr *th, struct tcp_options_received *opt_rx, const unsigned char *ptr, int opsize) @@ -3808,10 +3808,13 @@ static void smc_parse_options(const struct tcphdr *th, if (static_branch_unlikely(&tcp_have_smc)) { if (th->syn && !(opsize & 1) && opsize >= TCPOLEN_EXP_SMC_BASE && - get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) + get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) { opt_rx->smc_ok = 1; + return true; + } } #endif + return false; } /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped @@ -3864,7 +3867,8 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + bool *unknown_opt) { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); @@ -3962,15 +3966,23 @@ void tcp_parse_options(const struct net *net, */ if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && get_unaligned_be16(ptr) == - TCPOPT_FASTOPEN_MAGIC) + TCPOPT_FASTOPEN_MAGIC) { tcp_parse_fastopen_option(opsize - TCPOLEN_EXP_FASTOPEN_BASE, ptr + 2, th->syn, foc, true); - else - smc_parse_options(th, opt_rx, ptr, - opsize); + break; + } + + if (smc_parse_options(th, opt_rx, ptr, opsize)) + break; + + if (unknown_opt) + *unknown_opt = true; break; + default: + if (unknown_opt) + *unknown_opt = true; } ptr += opsize-2; length -= opsize; @@ -4003,7 +4015,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr */ static bool tcp_fast_parse_options(const struct net *net, const struct sk_buff *skb, - const struct tcphdr *th, struct tcp_sock *tp) + const struct tcphdr *th, struct tcp_sock *tp, + bool *unknown_opt) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -4017,7 +4030,7 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } - tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, unknown_opt); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5492,9 +5505,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); bool rst_seq_match = false; + bool unknown_opt = false; /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && + if (tcp_fast_parse_options(sock_net(sk), skb, th, tp, &unknown_opt) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { @@ -5866,7 +5880,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); + tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, NULL); mss = opt.mss_clamp; } @@ -5951,7 +5965,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; - tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, NULL); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -6685,7 +6699,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, - want_cookie ? NULL : &foc); + want_cookie ? NULL : &foc, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 495dda2449fe..61f9194802c4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); + tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) @@ -580,7 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 13235a012388..f22961a73c2b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -157,7 +157,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcpv6_ts_off(sock_net(sk), -- 2.24.1