The new helpers (bpf_ct_lookup_tcp and bpf_ct_lookup_udp) allow to query connection tracking information of TCP and UDP connections based on source and destination IP address and port. The helper returns a pointer to struct nf_conn (if the conntrack entry was found), which needs to be released with bpf_ct_release. Signed-off-by: Maxim Mikityanskiy <maximmi@xxxxxxxxxx> Reviewed-by: Tariq Toukan <tariqt@xxxxxxxxxx> --- include/uapi/linux/bpf.h | 81 +++++++++++++ kernel/bpf/verifier.c | 9 +- net/core/filter.c | 205 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 81 +++++++++++++ 4 files changed, 373 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a10a44c4f79b..883de3f1bb8b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4925,6 +4925,79 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct bpf_nf_conn *bpf_ct_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err) + * Description + * Look for conntrack info for a TCP connection matching *tuple*, + * optionally in a child network namespace *netns*. + * + * The *flags_err* argument is used as an input parameter for flags + * and output parameter for the error code. The flags can be a + * combination of one or more of the following values: + * + * **BPF_F_CT_DIR_REPLY** + * When set, the conntrack direction is IP_CT_DIR_REPLY, + * otherwise IP_CT_DIR_ORIGINAL. + * + * If the function returns **NULL**, *flags_err* will indicate the + * error code: + * + * **EAFNOSUPPORT** + * *tuple_size* doesn't match supported address families + * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled). + * + * **EINVAL** + * Input arguments are not valid. + * + * **ENOENT** + * Connection tracking entry for *tuple* wasn't found. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NF_CONNTRACK** configuration option as built-in. + * Return + * Connection tracking status (see **enum ip_conntrack_status**), + * or **NULL** in case of failure or if there is no conntrack entry + * for this tuple. + * + * struct bpf_nf_conn *bpf_ct_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err) + * Description + * Look for conntrack info for a UDP connection matching *tuple*, + * optionally in a child network namespace *netns*. + * + * The *flags_err* argument is used as an input parameter for flags + * and output parameter for the error code. The flags can be a + * combination of one or more of the following values: + * + * **BPF_F_CT_DIR_REPLY** + * When set, the conntrack direction is IP_CT_DIR_REPLY, + * otherwise IP_CT_DIR_ORIGINAL. + * + * If the function returns **NULL**, *flags_err* will indicate the + * error code: + * + * **EAFNOSUPPORT** + * *tuple_size* doesn't match supported address families + * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled). + * + * **EINVAL** + * Input arguments are not valid. + * + * **ENOENT** + * Connection tracking entry for *tuple* wasn't found. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NF_CONNTRACK** configuration option as built-in. + * Return + * Connection tracking status (see **enum ip_conntrack_status**), + * or **NULL** in case of failure or if there is no conntrack entry + * for this tuple. + * + * long bpf_ct_release(void *ct) + * Description + * Release the reference held by *ct*. *ct* must be a non-**NULL** + * pointer that was returned from **bpf_ct_lookup_xxx**\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5105,6 +5178,9 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(ct_lookup_tcp), \ + FN(ct_lookup_udp), \ + FN(ct_release), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5288,6 +5364,11 @@ enum { BPF_F_EXCLUDE_INGRESS = (1ULL << 4), }; +/* Flags for bpf_ct_lookup_{tcp,udp} helpers. */ +enum { + BPF_F_CT_DIR_REPLY = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6eafef35e247..23e2a23ca9c4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -506,7 +506,8 @@ static bool is_release_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_release || func_id == BPF_FUNC_ringbuf_submit || - func_id == BPF_FUNC_ringbuf_discard; + func_id == BPF_FUNC_ringbuf_discard || + func_id == BPF_FUNC_ct_release; } static bool may_be_acquire_function(enum bpf_func_id func_id) @@ -515,7 +516,8 @@ static bool may_be_acquire_function(enum bpf_func_id func_id) func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp || func_id == BPF_FUNC_map_lookup_elem || - func_id == BPF_FUNC_ringbuf_reserve; + func_id == BPF_FUNC_ringbuf_reserve || + func_id == BPF_FUNC_ct_lookup_tcp; } static bool is_acquire_function(enum bpf_func_id func_id, @@ -526,7 +528,8 @@ static bool is_acquire_function(enum bpf_func_id func_id, if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_ringbuf_reserve) + func_id == BPF_FUNC_ringbuf_reserve || + func_id == BPF_FUNC_ct_lookup_tcp) return true; if (func_id == BPF_FUNC_map_lookup_elem && diff --git a/net/core/filter.c b/net/core/filter.c index d2d07ccae599..f913851c97f7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -79,6 +79,7 @@ #include <net/tls.h> #include <net/xdp.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -7096,6 +7097,194 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { .arg3_type = ARG_ANYTHING, }; +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) +static struct nf_conn *bpf_ct_lookup(struct net *caller_net, + struct bpf_sock_tuple *tuple, + u32 tuple_len, + u8 protonum, + u64 netns_id, + u64 flags) +{ + struct nf_conntrack_tuple ct_tuple = {}; + struct nf_conntrack_tuple_hash *found; + struct net *net; + u8 direction; + + direction = flags & BPF_F_CT_DIR_REPLY ? IP_CT_DIR_REPLY : + IP_CT_DIR_ORIGINAL; + + if (flags & ~BPF_F_CT_DIR_REPLY) + return ERR_PTR(-EINVAL); + + if (tuple_len == sizeof(tuple->ipv4)) { + ct_tuple.src.l3num = AF_INET; + ct_tuple.src.u3.ip = tuple->ipv4.saddr; + ct_tuple.src.u.tcp.port = tuple->ipv4.sport; + ct_tuple.dst.u3.ip = tuple->ipv4.daddr; + ct_tuple.dst.u.tcp.port = tuple->ipv4.dport; +#if IS_ENABLED(CONFIG_IPV6) + } else if (tuple_len == sizeof(tuple->ipv6)) { + ct_tuple.src.l3num = AF_INET6; + memcpy(ct_tuple.src.u3.ip6, tuple->ipv6.saddr, + sizeof(tuple->ipv6.saddr)); + ct_tuple.src.u.tcp.port = tuple->ipv6.sport; + memcpy(ct_tuple.dst.u3.ip6, tuple->ipv6.daddr, + sizeof(tuple->ipv6.daddr)); + ct_tuple.dst.u.tcp.port = tuple->ipv6.dport; +#endif + } else { + return ERR_PTR(-EAFNOSUPPORT); + } + + ct_tuple.dst.protonum = protonum; + ct_tuple.dst.dir = direction; + + net = caller_net; + if ((s32)netns_id >= 0) { + if (unlikely(netns_id > S32_MAX)) + return ERR_PTR(-EINVAL); + net = get_net_ns_by_id(net, netns_id); + if (!net) + return ERR_PTR(-EINVAL); + } + + found = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &ct_tuple); + + if ((s32)netns_id >= 0) + put_net(net); + + if (!found) + return ERR_PTR(-ENOENT); + return nf_ct_tuplehash_to_ctrack(found); +} + +BPF_CALL_5(bpf_xdp_ct_lookup_tcp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, tuple_len, + u64, netns_id, u64 *, flags_err) +{ + struct nf_conn *ct; + + ct = bpf_ct_lookup(dev_net(ctx->rxq->dev), tuple, tuple_len, + IPPROTO_TCP, netns_id, *flags_err); + if (IS_ERR(ct)) { + *flags_err = PTR_ERR(ct); + return (unsigned long)NULL; + } + return (unsigned long)ct; +} + +static const struct bpf_func_proto bpf_xdp_ct_lookup_tcp_proto = { + .func = bpf_xdp_ct_lookup_tcp, + .gpl_only = true, /* nf_conntrack_find_get is GPL */ + .pkt_access = true, + .ret_type = RET_PTR_TO_NF_CONN_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_5(bpf_xdp_ct_lookup_udp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, tuple_len, + u64, netns_id, u64 *, flags_err) +{ + struct nf_conn *ct; + + ct = bpf_ct_lookup(dev_net(ctx->rxq->dev), tuple, tuple_len, + IPPROTO_UDP, netns_id, *flags_err); + if (IS_ERR(ct)) { + *flags_err = PTR_ERR(ct); + return (unsigned long)NULL; + } + return (unsigned long)ct; +} + +static const struct bpf_func_proto bpf_xdp_ct_lookup_udp_proto = { + .func = bpf_xdp_ct_lookup_udp, + .gpl_only = true, /* nf_conntrack_find_get is GPL */ + .pkt_access = true, + .ret_type = RET_PTR_TO_NF_CONN_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_5(bpf_skb_ct_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, tuple_len, + u64, netns_id, u64 *, flags_err) +{ + struct net *caller_net; + struct nf_conn *ct; + + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + ct = bpf_ct_lookup(caller_net, tuple, tuple_len, IPPROTO_TCP, + netns_id, *flags_err); + if (IS_ERR(ct)) { + *flags_err = PTR_ERR(ct); + return (unsigned long)NULL; + } + return (unsigned long)ct; +} + +static const struct bpf_func_proto bpf_skb_ct_lookup_tcp_proto = { + .func = bpf_skb_ct_lookup_tcp, + .gpl_only = true, /* nf_conntrack_find_get is GPL */ + .pkt_access = true, + .ret_type = RET_PTR_TO_NF_CONN_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_5(bpf_skb_ct_lookup_udp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, tuple_len, + u64, netns_id, u64 *, flags_err) +{ + struct net *caller_net; + struct nf_conn *ct; + + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + ct = bpf_ct_lookup(caller_net, tuple, tuple_len, IPPROTO_UDP, + netns_id, *flags_err); + if (IS_ERR(ct)) { + *flags_err = PTR_ERR(ct); + return (unsigned long)NULL; + } + return (unsigned long)ct; +} + +static const struct bpf_func_proto bpf_skb_ct_lookup_udp_proto = { + .func = bpf_skb_ct_lookup_udp, + .gpl_only = true, /* nf_conntrack_find_get is GPL */ + .pkt_access = true, + .ret_type = RET_PTR_TO_NF_CONN_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_1(bpf_ct_release, struct nf_conn *, ct) +{ + nf_ct_put(ct); + return 0; +} + +static const struct bpf_func_proto bpf_ct_release_proto = { + .func = bpf_ct_release, + .gpl_only = false, + .pkt_access = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_NF_CONN, +}; +#endif + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7455,6 +7644,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_gen_syncookie_proto; case BPF_FUNC_sk_assign: return &bpf_sk_assign_proto; +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) + case BPF_FUNC_ct_lookup_tcp: + return &bpf_skb_ct_lookup_tcp_proto; + case BPF_FUNC_ct_lookup_udp: + return &bpf_skb_ct_lookup_udp_proto; + case BPF_FUNC_ct_release: + return &bpf_ct_release_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); @@ -7498,6 +7695,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) + case BPF_FUNC_ct_lookup_tcp: + return &bpf_xdp_ct_lookup_tcp_proto; + case BPF_FUNC_ct_lookup_udp: + return &bpf_xdp_ct_lookup_udp_proto; + case BPF_FUNC_ct_release: + return &bpf_ct_release_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a10a44c4f79b..883de3f1bb8b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4925,6 +4925,79 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct bpf_nf_conn *bpf_ct_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err) + * Description + * Look for conntrack info for a TCP connection matching *tuple*, + * optionally in a child network namespace *netns*. + * + * The *flags_err* argument is used as an input parameter for flags + * and output parameter for the error code. The flags can be a + * combination of one or more of the following values: + * + * **BPF_F_CT_DIR_REPLY** + * When set, the conntrack direction is IP_CT_DIR_REPLY, + * otherwise IP_CT_DIR_ORIGINAL. + * + * If the function returns **NULL**, *flags_err* will indicate the + * error code: + * + * **EAFNOSUPPORT** + * *tuple_size* doesn't match supported address families + * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled). + * + * **EINVAL** + * Input arguments are not valid. + * + * **ENOENT** + * Connection tracking entry for *tuple* wasn't found. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NF_CONNTRACK** configuration option as built-in. + * Return + * Connection tracking status (see **enum ip_conntrack_status**), + * or **NULL** in case of failure or if there is no conntrack entry + * for this tuple. + * + * struct bpf_nf_conn *bpf_ct_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 *flags_err) + * Description + * Look for conntrack info for a UDP connection matching *tuple*, + * optionally in a child network namespace *netns*. + * + * The *flags_err* argument is used as an input parameter for flags + * and output parameter for the error code. The flags can be a + * combination of one or more of the following values: + * + * **BPF_F_CT_DIR_REPLY** + * When set, the conntrack direction is IP_CT_DIR_REPLY, + * otherwise IP_CT_DIR_ORIGINAL. + * + * If the function returns **NULL**, *flags_err* will indicate the + * error code: + * + * **EAFNOSUPPORT** + * *tuple_size* doesn't match supported address families + * (AF_INET; AF_INET6 when CONFIG_IPV6 is enabled). + * + * **EINVAL** + * Input arguments are not valid. + * + * **ENOENT** + * Connection tracking entry for *tuple* wasn't found. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NF_CONNTRACK** configuration option as built-in. + * Return + * Connection tracking status (see **enum ip_conntrack_status**), + * or **NULL** in case of failure or if there is no conntrack entry + * for this tuple. + * + * long bpf_ct_release(void *ct) + * Description + * Release the reference held by *ct*. *ct* must be a non-**NULL** + * pointer that was returned from **bpf_ct_lookup_xxx**\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5105,6 +5178,9 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(ct_lookup_tcp), \ + FN(ct_lookup_udp), \ + FN(ct_release), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5288,6 +5364,11 @@ enum { BPF_F_EXCLUDE_INGRESS = (1ULL << 4), }; +/* Flags for bpf_ct_lookup_{tcp,udp} helpers. */ +enum { + BPF_F_CT_DIR_REPLY = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- 2.30.2