Introduce bpf_xdp_ct_add and bpf_skb_ct_add kfunc helpers in order to add a new entry to ct map from an ebpf program. Introduce bpf_nf_ct_tuple_parse utility routine. Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> --- net/netfilter/nf_conntrack_bpf.c | 212 +++++++++++++++++++++++++++---- 1 file changed, 189 insertions(+), 23 deletions(-) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index a9271418db88..3d31b602fdf1 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -55,41 +55,114 @@ enum { NF_BPF_CT_OPTS_SZ = 12, }; -static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, - struct bpf_sock_tuple *bpf_tuple, - u32 tuple_len, u8 protonum, - s32 netns_id, u8 *dir) +static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, u8 dir, + struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_tuple_hash *hash; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; + union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3; + union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3; + union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u + : &tuple->src.u; + union nf_conntrack_man_proto *dport = dir ? &tuple->src.u + : (void *)&tuple->dst.u; if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) - return ERR_PTR(-EPROTO); - if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) - return ERR_PTR(-EINVAL); + return -EPROTO; + + memset(tuple, 0, sizeof(*tuple)); - memset(&tuple, 0, sizeof(tuple)); switch (tuple_len) { case sizeof(bpf_tuple->ipv4): - tuple.src.l3num = AF_INET; - tuple.src.u3.ip = bpf_tuple->ipv4.saddr; - tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; - tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; - tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + tuple->src.l3num = AF_INET; + src->ip = bpf_tuple->ipv4.saddr; + sport->tcp.port = bpf_tuple->ipv4.sport; + dst->ip = bpf_tuple->ipv4.daddr; + dport->tcp.port = bpf_tuple->ipv4.dport; break; case sizeof(bpf_tuple->ipv6): - tuple.src.l3num = AF_INET6; - memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); - tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; - memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); - tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + tuple->src.l3num = AF_INET6; + memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + sport->tcp.port = bpf_tuple->ipv6.sport; + memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + dport->tcp.port = bpf_tuple->ipv6.dport; break; default: - return ERR_PTR(-EAFNOSUPPORT); + return -EAFNOSUPPORT; } + tuple->dst.protonum = protonum; + tuple->dst.dir = dir; + + return 0; +} - tuple.dst.protonum = protonum; +struct nf_conn * +__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, s32 netns_id, u32 timeout) +{ + struct nf_conntrack_tuple otuple, rtuple; + struct nf_conn *ct; + int err; + + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, protonum, + IP_CT_DIR_ORIGINAL, &otuple); + if (err < 0) + return ERR_PTR(err); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, protonum, + IP_CT_DIR_REPLY, &rtuple); + if (err < 0) + return ERR_PTR(err); + + if (netns_id >= 0) { + net = get_net_ns_by_id(net, netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, + GFP_ATOMIC); + if (IS_ERR(ct)) + goto out; + + ct->timeout = timeout * HZ + jiffies; + ct->status |= IPS_CONFIRMED; + + memset(&ct->proto, 0, sizeof(ct->proto)); + if (protonum == IPPROTO_TCP) + ct->proto.tcp.state = TCP_CONNTRACK_ESTABLISHED; + + err = nf_conntrack_hash_check_insert(ct); + if (err < 0) { + nf_conntrack_free(ct); + ct = ERR_PTR(err); + } +out: + if (netns_id >= 0) + put_net(net); + + return ct; +} + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, + s32 netns_id, u8 *dir) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + int err; + + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, protonum, + IP_CT_DIR_ORIGINAL, &tuple); + if (err < 0) + return ERR_PTR(err); if (netns_id >= 0) { net = get_net_ns_by_id(net, netns_id); @@ -114,6 +187,50 @@ __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); +/* bpf_xdp_ct_add - Add a new CT entry for the given tuple and acquire a + * reference to it + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_xdp_ct_add(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + if (!opts) + return NULL; + + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, + tuple__sz, opts->l4proto, + opts->netns_id, 10); + if (IS_ERR_OR_NULL(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + + return nfct; +} + /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -157,6 +274,51 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, return nfct; } +/* bpf_skb_ct_add - Add a new CT entry for the given tuple and acquire a + * reference to it + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_skb_ct_add(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct nf_conn *nfct; + struct net *net; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + if (!opts) + return NULL; + + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, + opts->l4proto, opts->netns_id, 10); + if (IS_ERR_OR_NULL(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + + return nfct; +} + /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -238,19 +400,23 @@ void bpf_ct_refresh_timeout(struct nf_conn *nfct__ref, u32 timeout) __diag_pop() BTF_SET_START(nf_ct_xdp_check_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_add) BTF_ID(func, bpf_xdp_ct_lookup) BTF_ID(func, bpf_ct_release) BTF_ID(func, bpf_ct_refresh_timeout); BTF_SET_END(nf_ct_xdp_check_kfunc_ids) BTF_SET_START(nf_ct_tc_check_kfunc_ids) +BTF_ID(func, bpf_skb_ct_add) BTF_ID(func, bpf_skb_ct_lookup) BTF_ID(func, bpf_ct_release) BTF_ID(func, bpf_ct_refresh_timeout); BTF_SET_END(nf_ct_tc_check_kfunc_ids) BTF_SET_START(nf_ct_acquire_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_add) BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_skb_ct_add) BTF_ID(func, bpf_skb_ct_lookup) BTF_SET_END(nf_ct_acquire_kfunc_ids) -- 2.35.3