We introduce a new style of kfunc helpers, namely *_kptr_get, where they take pointer to the map value which points to a referenced kernel pointer contained in the map. Since this is referenced, only BPF_XCHG from kernel and BPF side is allowed to change the current value, and each pointer that resides in that location would be referenced, and RCU protected (must be kept in mind while adding kernel types embeddable as reference kptr in BPF maps). This means that if do the load of the pointer value in an RCU read section, and find a live pointer, then as long as we hold RCU read lock, it won't be freed by a parallel xchg + release operation. This allows us to implement a safe refcount increment scheme. Hence, enforce that first argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the right offset to referenced pointer. For the rest of the arguments, they are subjected to typical kfunc argument checks, hence allowing some flexibility in passing more intent into how the reference should be taken. For instance, in case of struct nf_conn, it is not freed until all RCU grace period ends, but can still be reused for another tuple once refcount has dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call refcount_inc_not_zero, but also do a tuple match after incrementing the reference, and when it fails to match it, put the reference again and return NULL. This can be implemented easily if we allow passing additional parameters to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a tuple__sz pair. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> --- include/linux/btf.h | 2 ++ kernel/bpf/btf.c | 48 +++++++++++++++++++++++++++++++++++++++++-- kernel/bpf/verifier.c | 9 ++++---- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index c7e75be9637f..10918ac0e55f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,7 @@ enum btf_kfunc_type { BTF_KFUNC_TYPE_ACQUIRE, BTF_KFUNC_TYPE_RELEASE, BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_KPTR_ACQUIRE, BTF_KFUNC_TYPE_MAX, }; @@ -36,6 +37,7 @@ struct btf_kfunc_id_set { struct btf_id_set *acquire_set; struct btf_id_set *release_set; struct btf_id_set *ret_null_set; + struct btf_id_set *kptr_acquire_set; }; struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f322967da54b..1d112db4c124 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6034,11 +6034,11 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); + bool rel = false, kptr_get = false; const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; int ref_regno = 0; - bool rel = false; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -6064,6 +6064,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + if (is_kfunc) + kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); /* check that BTF function arguments match actual types that the * verifier sees. */ @@ -6087,7 +6090,48 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - if (btf_get_prog_ctx_type(log, btf, t, + if (i == 0 && is_kfunc && kptr_get) { + struct bpf_map_value_off_desc *off_desc; + + if (reg->type != PTR_TO_MAP_VALUE) { + bpf_log(log, "arg#0 expected pointer to map value, but got %s\n", + btf_type_str(t)); + return -EINVAL; + } + + if (!tnum_is_const(reg->var_off)) { + bpf_log(log, "arg#0 cannot have variable offset\n"); + return -EINVAL; + } + + off_desc = bpf_map_ptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value); + if (!off_desc || !(off_desc->flags & BPF_MAP_VALUE_OFF_F_REF)) { + bpf_log(log, "arg#0 no referenced pointer at map value offset=%llu\n", + reg->off + reg->var_off.value); + return -EINVAL; + } + + if (!btf_type_is_ptr(ref_t)) { + bpf_log(log, "arg#0 type must be a double pointer\n"); + return -EINVAL; + } + + ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + + if (!btf_type_is_struct(ref_t)) { + bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", + func_name, i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->btf, off_desc->btf_id)) { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n", + func_name, i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + + /* rest of the arguments can be anything, like normal kfunc */ + } else if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) { /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0a2cd21d9ec1..a4ff951ea46f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3657,11 +3657,12 @@ static int check_map_ptr_to_btf_id(struct bpf_verifier_env *env, u32 regno, int } else if (insn_class == BPF_LDX) { if (WARN_ON_ONCE(value_regno < 0)) return -EFAULT; + /* We allow loading referenced pointer, but mark it as + * untrusted. User needs to use a kptr_get helper to obtain a + * trusted refcounted PTR_TO_BTF_ID by passing in the map + * value pointing to the referenced pointer. + */ val_reg = reg_state(env, value_regno); - if (ref_ptr) { - verbose(env, "referenced btf_id pointer can only be accessed using BPF_XCHG\n"); - return -EACCES; - } /* We can simply mark the value_regno receiving the pointer * value from map as PTR_TO_BTF_ID, with the correct type. */ -- 2.35.1