Add a flag that, if set, triggers the call of eBPF program for each packet holding an IPv6 extension header. Also add a sock_ops operator that identifies such call. This change uses skb_data and skb_data_end introduced for TCP options' parsing but these pointer cover the IPv6 header and its extension headers. For instance, this change allows to read an eBPF sock_ops program to read complex Segment Routing Headers carrying complex messages in TLV or observing its intermediate segments as soon as they are received. Signed-off-by: Mathieu Jadin <mathjadin@xxxxxxxxx> --- include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 26 ++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 26 +++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c26871263f1f..34e48f5727a4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5849,6 +5849,10 @@ struct bpf_sock_ops { * the 3WHS. * * bpf_load_hdr_opt() can also be used to read a particular option. + * + * Under sock_ops->op == BPF_SOCK_OPS_PARSE_IP6_HDR_CB, + * [skb_data, skb_data_end] covers the whole IPv6 header + * with its extension headers. */ __bpf_md_ptr(void *, skb_data); __bpf_md_ptr(void *, skb_data_end); @@ -5917,8 +5921,15 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), + /* Call bpf for all received IPv6 extension headers. The bpf prog will + * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and + * will be able to parse the IPv6 header and its extension headers. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, }; /* List of known BPF sock_ops operators. @@ -6031,6 +6042,19 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_PARSE_IPV6_HDR_CB, /* Parse the IPv6 extension + * header option. + * It will be called to handle + * the packets received at + * an already established + * connection with an extension + * header. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the IPv6 header and + * its extension headers only. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 551fce49841d..6b47c973f776 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1470,7 +1470,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; - struct tcp_sock *tp; + struct tcp_sock *tp = tcp_sk(sk); /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. @@ -1518,6 +1518,29 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } + /* Call ebpf on packets with extension headers */ + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG) && + ipv6_hdr(skb)->nexthdr != IPPROTO_TCP) { + struct bpf_sock_ops_kern sock_ops; + void *old_data_ptr; + + memset(&sock_ops, 0, + offsetof(struct bpf_sock_ops_kern, temp)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; + sock_owned_by_me(sk); + } + sock_ops.op = BPF_SOCK_OPS_PARSE_IPV6_HDR_CB; + sock_ops.sk = sk; + sock_ops.skb = skb; + /* Temporary use the network header as skb data */ + sock_ops.skb_data_end = skb_transport_header(skb); + old_data_ptr = skb->data; + skb->data = skb_network_header(skb); + BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); + skb->data = old_data_ptr; + } + tcp_rcv_established(sk, skb); if (opt_skb) goto ipv6_pktoptions; @@ -1571,7 +1594,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 3. socket is not in passive state. 4. Finally, it really contains options, which user wants to receive. */ - tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c26871263f1f..34e48f5727a4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5849,6 +5849,10 @@ struct bpf_sock_ops { * the 3WHS. * * bpf_load_hdr_opt() can also be used to read a particular option. + * + * Under sock_ops->op == BPF_SOCK_OPS_PARSE_IP6_HDR_CB, + * [skb_data, skb_data_end] covers the whole IPv6 header + * with its extension headers. */ __bpf_md_ptr(void *, skb_data); __bpf_md_ptr(void *, skb_data_end); @@ -5917,8 +5921,15 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), + /* Call bpf for all received IPv6 extension headers. The bpf prog will + * be called under sock_ops->op == BPF_SOCK_OPS_PARSE_IPV6_HDR_CB and + * will be able to parse the IPv6 header and its extension headers. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_IPV6_HDR_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, }; /* List of known BPF sock_ops operators. @@ -6031,6 +6042,19 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_PARSE_IPV6_HDR_CB, /* Parse the IPv6 extension + * header option. + * It will be called to handle + * the packets received at + * an already established + * connection with an extension + * header. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the IPv6 header and + * its extension headers only. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- 2.32.0