BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type of bpf programs, e.g., a->b. If "a" is a pointer pointing to kernel memory, bpf verifier will allow user to write code in C like a->b and the verifier will translate it to a kernel load properly. If "a" is a pointer to user memory, it is expected that bpf developer should be bpf_probe_read_user() helper to get the value a->b. Without utilizing BTF __user tagging information, current verifier will assume that a->b is a kernel memory access and this may generate incorrect result. Now BTF contains __user information, it can check whether the pointer points to a user memory or not. If it is, the verifier can reject the program and force users to use bpf_probe_read_user() helper explicitly. Currently, an enum type is used to define __user address space: enum btf_addr_space { BTF_ADDRSPACE_UNSPEC = 0, BTF_ADDRSPACE_USER = 1, } In the future, we can easily extend btf_add_space for other address space tagging, for example, rcu. Signed-off-by: Yonghong Song <yhs@xxxxxx> --- include/linux/bpf.h | 12 ++++++++-- include/linux/bpf_verifier.h | 1 + include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 40 +++++++++++++++++++++++++++------- kernel/bpf/verifier.c | 35 +++++++++++++++++++++-------- net/bpf/bpf_dummy_struct_ops.c | 6 +++-- net/ipv4/bpf_tcp_ca.c | 6 +++-- 7 files changed, 82 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8bbf08fbab66..3a18b7b980d4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -463,6 +463,12 @@ enum bpf_reg_type { __BPF_REG_TYPE_MAX, }; +/* The pointee address space encoded in BTF. */ +enum btf_addr_space { + BTF_ADDRSPACE_UNSPEC = 0, + BTF_ADDRSPACE_USER = 1, +}; + /* The information passed from prog-specific *_is_valid_access * back to the verifier. */ @@ -473,6 +479,7 @@ struct bpf_insn_access_aux { struct { struct btf *btf; u32 btf_id; + enum btf_addr_space addr_space; }; }; struct bpf_verifier_log *log; /* for verbose logs */ @@ -519,7 +526,8 @@ struct bpf_verifier_ops { const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, + enum btf_addr_space *addr_space); bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; @@ -1701,7 +1709,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum btf_addr_space *addr_space); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 182b16a91084..698b9837334d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,6 +66,7 @@ struct bpf_reg_state { struct { struct btf *btf; u32 btf_id; + enum btf_addr_space addr_space; }; u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ diff --git a/include/linux/btf.h b/include/linux/btf.h index acef6ef28768..56a1074155d9 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -216,6 +216,11 @@ static inline bool btf_type_is_var(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; } +static inline bool btf_type_is_type_tag(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG; +} + /* union is only a special case of struct: * all its offsetof(member) == 0 */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 27b7de538697..831445b3d97c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4849,6 +4849,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + const char *tag_value; u32 nr_args, arg; int i, ret; @@ -4998,7 +4999,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, info->btf = btf; info->btf_id = t->type; + info->addr_space = BTF_ADDRSPACE_UNSPEC; t = btf_type_by_id(btf, t->type); + + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + if (strcmp(tag_value, "user") == 0) + info->addr_space = BTF_ADDRSPACE_USER; + } + /* skip modifiers */ while (btf_type_is_modifier(t)) { info->btf_id = t->type; @@ -5010,8 +5019,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); return false; } - bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", - tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], + bpf_log(log, "func '%s' arg%d has btf_id %d addr_space %d type %s '%s'\n", + tname, arg, info->btf_id, info->addr_space, + btf_kind_str[BTF_INFO_KIND(t->info)], __btf_name_by_offset(btf, t->name_off)); return true; } @@ -5025,12 +5035,12 @@ enum bpf_struct_walk_result { static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, - u32 *next_btf_id) + u32 *next_btf_id, enum btf_addr_space *addr_space) { u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; - const char *tname, *mname; + const char *tname, *mname, *tag_value; u32 vlen, elem_id, mid; again: @@ -5214,7 +5224,8 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, } if (btf_type_is_ptr(mtype)) { - const struct btf_type *stype; + enum btf_addr_space tmp_addr_space = BTF_ADDRSPACE_UNSPEC; + const struct btf_type *stype, *t; u32 id; if (msize != size || off != moff) { @@ -5223,9 +5234,19 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, mname, moff, tname, off, size); return -EACCES; } + + /* check __user tag */ + t = btf_type_by_id(btf, mtype->type); + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + if (strcmp(tag_value, "user") == 0) + tmp_addr_space = BTF_ADDRSPACE_USER; + } + stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; + *addr_space = tmp_addr_space; return WALK_PTR; } } @@ -5252,13 +5273,14 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype __maybe_unused, - u32 *next_btf_id) + u32 *next_btf_id, enum btf_addr_space *addr_space) { + enum btf_addr_space tmp_addr_space; int err; u32 id; do { - err = btf_struct_walk(log, btf, t, off, size, &id); + err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_addr_space); switch (err) { case WALK_PTR: @@ -5266,6 +5288,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, * we're done. */ *next_btf_id = id; + *addr_space = tmp_addr_space; return PTR_TO_BTF_ID; case WALK_SCALAR: return SCALAR_VALUE; @@ -5309,6 +5332,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id) { + enum btf_addr_space addr_space; const struct btf_type *type; int err; @@ -5320,7 +5344,7 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log, type = btf_type_by_id(btf, id); if (!type) return false; - err = btf_struct_walk(log, btf, type, off, 1, &id); + err = btf_struct_walk(log, btf, type, off, 1, &id, &addr_space); if (err != WALK_STRUCT) return false; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1126b75fe650..b4968dcaf80f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -649,7 +649,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL || t == PTR_TO_PERCPU_BTF_ID) - verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); + verbose(env, "%s,addr_space=%d", + kernel_type_name(reg->btf, reg->btf_id), + reg->addr_space); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); @@ -1498,7 +1500,8 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, enum bpf_reg_type reg_type, - struct btf *btf, u32 btf_id) + struct btf *btf, u32 btf_id, + enum btf_addr_space addr_space) { if (reg_type == SCALAR_VALUE) { mark_reg_unknown(env, regs, regno); @@ -1508,6 +1511,7 @@ static void mark_btf_ld_reg(struct bpf_verifier_env *env, regs[regno].type = PTR_TO_BTF_ID; regs[regno].btf = btf; regs[regno].btf_id = btf_id; + regs[regno].addr_space = addr_space; } #define DEF_NOT_SUBREG (0) @@ -3553,7 +3557,7 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type, - struct btf **btf, u32 *btf_id) + struct btf **btf, u32 *btf_id, enum btf_addr_space *addr_space) { struct bpf_insn_access_aux info = { .reg_type = *reg_type, @@ -3574,6 +3578,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { *btf = info.btf; *btf_id = info.btf_id; + *addr_space = info.addr_space; } else { env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; } @@ -4099,6 +4104,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg = regs + regno; const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); const char *tname = btf_name_by_offset(reg->btf, t->name_off); + enum btf_addr_space addr_space; u32 btf_id; int ret; @@ -4118,9 +4124,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } + if (reg->addr_space == BTF_ADDRSPACE_USER) { + verbose(env, + "R%d is ptr_%s access user memory: off=%d\n", + regno, tname, off); + return -EACCES; + } + if (env->ops->btf_struct_access) { ret = env->ops->btf_struct_access(&env->log, reg->btf, t, - off, size, atype, &btf_id); + off, size, atype, &btf_id, &addr_space); } else { if (atype != BPF_READ) { verbose(env, "only read is supported\n"); @@ -4128,14 +4141,14 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } ret = btf_struct_access(&env->log, reg->btf, t, off, size, - atype, &btf_id); + atype, &btf_id, &addr_space); } if (ret < 0) return ret; if (atype == BPF_READ && value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, addr_space); return 0; } @@ -4148,6 +4161,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, { struct bpf_reg_state *reg = regs + regno; struct bpf_map *map = reg->map_ptr; + enum btf_addr_space addr_space; const struct btf_type *t; const char *tname; u32 btf_id; @@ -4185,12 +4199,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return -EACCES; } - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id); + ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &addr_space); if (ret < 0) return ret; if (value_regno >= 0) - mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id); + mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, addr_space); return 0; } @@ -4362,6 +4376,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { + enum btf_addr_space addr_space = BTF_ADDRSPACE_UNSPEC; enum bpf_reg_type reg_type = SCALAR_VALUE; struct btf *btf = NULL; u32 btf_id = 0; @@ -4376,7 +4391,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err < 0) return err; - err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, &btf_id); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, + &btf_id, &addr_space); if (err) verbose_linfo(env, insn_idx, "; "); if (!err && t == BPF_READ && value_regno >= 0) { @@ -4401,6 +4417,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn reg_type == PTR_TO_BTF_ID_OR_NULL) { regs[value_regno].btf = btf; regs[value_regno].btf_id = btf_id; + regs[value_regno].addr_space = addr_space; } } regs[value_regno].type = reg_type; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index fbc896323bec..57db75427a88 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum btf_addr_space *addr_space) { const struct btf_type *state; s32 type_id; @@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + addr_space); if (err < 0) return err; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 67466dbff152..dba60f85369b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -95,12 +95,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum btf_addr_space *addr_space) { size_t end; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + addr_space); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); -- 2.30.2