This commit adds struct nf_conn as a new type to BPF. For now, only the status field is mapped. It will allow to add helpers that will expose conntrack information to BPF programs. Signed-off-by: Maxim Mikityanskiy <maximmi@xxxxxxxxxx> Reviewed-by: Tariq Toukan <tariqt@xxxxxxxxxx> --- include/linux/bpf.h | 46 ++++++++++++++++ include/uapi/linux/bpf.h | 4 ++ kernel/bpf/verifier.c | 95 ++++++++++++++++++++++++++++++++-- net/core/filter.c | 41 +++++++++++++++ scripts/bpf_doc.py | 1 + tools/include/uapi/linux/bpf.h | 4 ++ 6 files changed, 186 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d604c8251d88..21ca6e1f0f7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -341,6 +341,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + ARG_PTR_TO_NF_CONN, /* pointer to nf_conn */ __BPF_ARG_TYPE_MAX, }; @@ -358,6 +359,7 @@ enum bpf_return_type { RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ + RET_PTR_TO_NF_CONN_OR_NULL, /* returns a pointer to a nf_conn or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -459,6 +461,8 @@ enum bpf_reg_type { PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ PTR_TO_MAP_KEY, /* reg points to a map element key */ + PTR_TO_NF_CONN, /* reg points to struct nf_conn */ + PTR_TO_NF_CONN_OR_NULL, /* reg points to struct nf_conn or NULL */ __BPF_REG_TYPE_MAX, }; @@ -2127,6 +2131,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); + +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) +bool bpf_ct_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_ct_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size); +#else +static inline bool bpf_ct_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_ct_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif + #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -2148,6 +2178,22 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } + +static inline bool bpf_ct_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_ct_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} #endif #ifdef CONFIG_INET diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index efb2750f39c6..a10a44c4f79b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5472,6 +5472,10 @@ struct bpf_xdp_sock { __u32 queue_id; }; +struct bpf_nf_conn { + __u64 status; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20900a1bac12..6eafef35e247 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -436,13 +436,19 @@ static bool type_is_sk_pointer(enum bpf_reg_type type) type == PTR_TO_XDP_SOCK; } +static bool type_is_ct_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_NF_CONN; +} + static bool reg_type_not_null(enum bpf_reg_type type) { return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || - type == PTR_TO_SOCK_COMMON; + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_NF_CONN; } static bool reg_type_may_be_null(enum bpf_reg_type type) @@ -454,7 +460,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_BTF_ID_OR_NULL || type == PTR_TO_MEM_OR_NULL || type == PTR_TO_RDONLY_BUF_OR_NULL || - type == PTR_TO_RDWR_BUF_OR_NULL; + type == PTR_TO_RDWR_BUF_OR_NULL || + type == PTR_TO_NF_CONN_OR_NULL; } static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) @@ -470,12 +477,15 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) type == PTR_TO_TCP_SOCK || type == PTR_TO_TCP_SOCK_OR_NULL || type == PTR_TO_MEM || - type == PTR_TO_MEM_OR_NULL; + type == PTR_TO_MEM_OR_NULL || + type == PTR_TO_NF_CONN || + type == PTR_TO_NF_CONN_OR_NULL; } static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { - return type == ARG_PTR_TO_SOCK_COMMON; + return type == ARG_PTR_TO_SOCK_COMMON || + type == ARG_PTR_TO_NF_CONN; } static bool arg_type_may_be_null(enum bpf_arg_type type) @@ -577,6 +587,8 @@ static const char * const reg_type_str[] = { [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", [PTR_TO_FUNC] = "func", [PTR_TO_MAP_KEY] = "map_key", + [PTR_TO_NF_CONN] = "nf_conn", + [PTR_TO_NF_CONN_OR_NULL] = "nf_conn_or_null", }; static char slot_type_char[] = { @@ -1189,6 +1201,9 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) case PTR_TO_RDWR_BUF_OR_NULL: reg->type = PTR_TO_RDWR_BUF; break; + case PTR_TO_NF_CONN_OR_NULL: + reg->type = PTR_TO_NF_CONN; + break; default: WARN_ONCE(1, "unknown nullable register type"); } @@ -2748,6 +2763,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_MEM_OR_NULL: case PTR_TO_FUNC: case PTR_TO_MAP_KEY: + case PTR_TO_NF_CONN: + case PTR_TO_NF_CONN_OR_NULL: return true; default: return false; @@ -3665,6 +3682,40 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } +static int check_ct_access(struct bpf_verifier_env *env, int insn_idx, + u32 regno, int off, int size, enum bpf_access_type t) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + struct bpf_insn_access_aux info = {}; + bool valid; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + + switch (reg->type) { + case PTR_TO_NF_CONN: + valid = bpf_ct_is_valid_access(off, size, t, &info); + break; + default: + valid = false; + } + + if (valid) { + env->insn_aux_data[insn_idx].ctx_field_size = + info.ctx_field_size; + return 0; + } + + verbose(env, "R%d invalid %s access off=%d size=%d\n", + regno, reg_type_str[reg->type], off, size); + + return -EACCES; +} + static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); @@ -3684,6 +3735,13 @@ static bool is_sk_reg(struct bpf_verifier_env *env, int regno) return type_is_sk_pointer(reg->type); } +static bool is_ct_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return type_is_ct_pointer(reg->type); +} + static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) { const struct bpf_reg_state *reg = reg_state(env, regno); @@ -3804,6 +3862,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_XDP_SOCK: pointer_desc = "xdp_sock "; break; + case PTR_TO_NF_CONN: + pointer_desc = "nf_conn "; + break; default: break; } @@ -4478,6 +4539,15 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (type_is_ct_pointer(reg->type)) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); + return -EACCES; + } + err = check_ct_access(env, insn_idx, regno, off, size, t); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_TP_BUFFER) { err = check_tp_buffer_access(env, reg, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) @@ -4571,7 +4641,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg)) { + is_sk_reg(env, insn->dst_reg) || + is_ct_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str[reg_state(env, insn->dst_reg)->type]); @@ -5086,6 +5157,7 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types nf_conn_types = { .types = { PTR_TO_NF_CONN } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -5118,6 +5190,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, + [ARG_PTR_TO_NF_CONN] = &nf_conn_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -6586,6 +6659,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn */ regs[BPF_REG_0].btf = btf_vmlinux; regs[BPF_REG_0].btf_id = ret_btf_id; + } else if (fn->ret_type == RET_PTR_TO_NF_CONN_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_NF_CONN_OR_NULL; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -7214,6 +7290,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: + case PTR_TO_NF_CONN: + case PTR_TO_NF_CONN_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -10505,6 +10583,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: + case PTR_TO_NF_CONN: + case PTR_TO_NF_CONN_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -11040,6 +11120,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: case PTR_TO_BTF_ID_OR_NULL: + case PTR_TO_NF_CONN: + case PTR_TO_NF_CONN_OR_NULL: return false; default: return true; @@ -12462,6 +12544,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } continue; + case PTR_TO_NF_CONN: + convert_ctx_access = bpf_ct_convert_ctx_access; + break; default: continue; } diff --git a/net/core/filter.c b/net/core/filter.c index d04988e67640..d2d07ccae599 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -78,6 +78,7 @@ #include <linux/btf_ids.h> #include <net/tls.h> #include <net/xdp.h> +#include <net/netfilter/nf_conntrack.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -8002,6 +8003,24 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, return size == size_default; } +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) +bool bpf_ct_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off > sizeof(struct bpf_nf_conn)) + return false; + if (off % size != 0) + return false; + + switch (off) { + case offsetof(struct bpf_nf_conn, status): + return size == sizeof_field(struct bpf_nf_conn, status); + } + + return false; +} +#endif + static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -9094,6 +9113,28 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +#if IS_BUILTIN(CONFIG_NF_CONNTRACK) +u32 bpf_ct_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_nf_conn, status): + BUILD_BUG_ON(sizeof_field(struct nf_conn, status) > + sizeof_field(struct bpf_nf_conn, status)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct nf_conn, status), + si->dst_reg, si->src_reg, + offsetof(struct nf_conn, status)); + break; + } + + return insn - insn_buf; +} +#endif + static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 00ac7b79cddb..0c2cd955f5e0 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -596,6 +596,7 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct bpf_nf_conn', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index efb2750f39c6..a10a44c4f79b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5472,6 +5472,10 @@ struct bpf_xdp_sock { __u32 queue_id; }; +struct bpf_nf_conn { + __u64 status; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. -- 2.30.2