On 11/20/22 10:33 AM, Alexei Starovoitov wrote:
On Sun, Nov 20, 2022 at 08:15:22AM -0800, Yonghong Song wrote:Implement bpf_cast_to_kern_ctx() kfunc which does a type cast of a uapi ctx object to the corresponding kernel ctx. Previously if users want to access some data available in kctx but not in uapi ctx, bpf_probe_read_kernel() helper is needed. The introduction of bpf_cast_to_kern_ctx() allows direct memory access which makes code simpler and easier to understand. Signed-off-by: Yonghong Song <yhs@xxxxxx> --- include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 25 +++++++++++++++++++++++++ kernel/bpf/helpers.c | 6 ++++++ kernel/bpf/verifier.c | 21 +++++++++++++++++++++ 4 files changed, 57 insertions(+) diff --git a/include/linux/btf.h b/include/linux/btf.h index d5b26380a60f..4b5d799f5d02 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -470,6 +470,7 @@ const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg); +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); #else @@ -514,6 +515,10 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, { return NULL; } +static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, + enum bpf_prog_type prog_type) { + return -EINVAL; +} static inline bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0a3abbe56c5d..bef1b6cfe6b8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5603,6 +5603,31 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, return kern_ctx_type->type; }+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type)+{ + const struct btf_member *kctx_member; + const struct btf_type *conv_struct; + const struct btf_type *kctx_type; + u32 kctx_type_id; + + conv_struct = bpf_ctx_convert.t; + if (!conv_struct) { + bpf_log(log, "btf_vmlinux is malformed\n"); + return -EINVAL; + }If we get to this point this internal pointer would be already checked. No need to check it again. Just use it.
This is probably not true.Currently, conv_struct is tested in function btf_get_prog_ctx_type() which is called by get_kfunc_ptr_arg_type().
const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,const struct btf_type *t, enum bpf_prog_type prog_type,
int arg) { const struct btf_type *conv_struct; const struct btf_type *ctx_struct; const struct btf_member *ctx_type; const char *tname, *ctx_tname; conv_struct = bpf_ctx_convert.t; if (!conv_struct) { bpf_log(log, "btf_vmlinux is malformed\n"); return NULL; } ... } In get_kfunc_ptr_arg_type(), .../* In this function, we verify the kfunc's BTF as per the argument type, * leaving the rest of the verification with respect to the register * type to our caller. When a set of conditions hold in the BTF type of
* arguments, we resolve it to a known kfunc_ptr_arg_type. */if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
return KF_ARG_PTR_TO_CTX; Note that if bpf_ctx_convert.t is NULL, btf_get_prog_ctx_type() simply returns NULL and the logic simply follows through. Should we actually add a NULL checking for bpf_ctx_convert.t in bpf_parse_vmlinux? ... err = btf_check_type_tags(env, btf, 1); if (err) goto errout; /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); bpf_struct_ops_init(btf, log); ...
+ + /* get member for kernel ctx type */ + kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + kctx_type_id = kctx_member->type; + kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); + if (!btf_type_is_struct(kctx_type)) { + bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); + return -EINVAL; + } + + return kctx_type_id; +} + BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert)diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.cindex eaae7f474eda..dc6e994feeb9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1824,6 +1824,11 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) return __bpf_list_del(head, true); }+void *bpf_cast_to_kern_ctx(void *obj)+{ + return obj; +} + __diag_pop();BTF_SET8_START(generic_btf_ids)@@ -1844,6 +1849,7 @@ static const struct btf_kfunc_id_set generic_kfunc_set = { };BTF_SET8_START(common_btf_ids)+BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_SET8_END(common_btf_ids)static const struct btf_kfunc_id_set common_kfunc_set = {diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 195d24316750..a18b519c5225 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8118,6 +8118,7 @@ enum special_kfunc_type { KF_bpf_list_push_back, KF_bpf_list_pop_front, KF_bpf_list_pop_back, + KF_bpf_cast_to_kern_ctx, };BTF_SET_START(special_kfunc_set)@@ -8127,6 +8128,7 @@ BTF_ID(func, bpf_list_push_front) BTF_ID(func, bpf_list_push_back) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx) BTF_SET_END(special_kfunc_set)BTF_ID_LIST(special_kfunc_list)@@ -8136,6 +8138,7 @@ BTF_ID(func, bpf_list_push_front) BTF_ID(func, bpf_list_push_back) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx)static enum kfunc_ptr_arg_typeget_kfunc_ptr_arg_type(struct bpf_verifier_env *env, @@ -8149,6 +8152,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg = ®s[regno]; bool arg_mem_size = false;+ if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])+ return KF_ARG_PTR_TO_CTX; + /* In this function, we verify the kfunc's BTF as per the argument type, * leaving the rest of the verification with respect to the register * type to our caller. When a set of conditions hold in the BTF type of @@ -8633,6 +8639,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); return -EINVAL; } + + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog)); + if (ret < 0) + return -EINVAL; + meta->arg_constant.value = ret;It's not an arg. So 'arg_constant' doesn't fit. No need to save every byte in bpf_kfunc_call_arg_meta. Let's add new filed like 'ret_btf_id'.
Okay, I can do that.
+ } break; case KF_ARG_PTR_TO_ALLOC_BTF_ID: if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { @@ -8880,6 +8893,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = field->list_head.btf; regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; regs[BPF_REG_0].off = field->list_head.node_offset; + } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID;Let's use PTR_TO_BTF_ID | PTR_TRUSTED here. PTR_TRUSTED was just recently added (hours ago :) With that bpf_cast_to_kern_ctx() will return trusted pointer and we will be able to pass it to kfuncs and helpers that expect valid args.
Right, will add PTR_TRUSTED in the next revision.
+ regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta.arg_constant.value; } else { verbose(env, "kernel function %s unhandled dynamic return type\n", meta.func_name); @@ -15130,6 +15148,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[1] = addr[1]; insn_buf[2] = *insn; *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); + *cnt = 1;Nice! Important optimization. I guess we still need: +void *bpf_cast_to_kern_ctx(void *obj) +{ + return obj; +} otherwise resolve_btfids will be confused?
Right, we still need the above function definition so resolve_btfids can properly populate kfunc id for verification purpose.