On Fri, 13 Dec 2024 at 00:24, Daniel Xu <dxu@xxxxxxxxx> wrote: > > This commit allows progs to elide a null check on statically known map > lookup keys. In other words, if the verifier can statically prove that > the lookup will be in-bounds, allow the prog to drop the null check. > > This is useful for two reasons: > > 1. Large numbers of nullness checks (especially when they cannot fail) > unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ. > 2. It forms a tighter contract between programmer and verifier. > > For (1), bpftrace is starting to make heavier use of percpu scratch > maps. As a result, for user scripts with large number of unrolled loops, > we are starting to hit jump complexity verification errors. These > percpu lookups cannot fail anyways, as we only use static key values. > Eliding nullness probably results in less work for verifier as well. > > For (2), percpu scratch maps are often used as a larger stack, as the > currrent stack is limited to 512 bytes. In these situations, it is > desirable for the programmer to express: "this lookup should never fail, > and if it does, it means I messed up the code". By omitting the null > check, the programmer can "ask" the verifier to double check the logic. > > Tests also have to be updated in sync with these changes, as the > verifier is more efficient with this change. Notable, iters.c tests had > to be changed to use a map type that still requires null checks, as it's > exercising verifier tracking logic w.r.t iterators. > > Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> > --- > kernel/bpf/verifier.c | 80 ++++++++++++++++++- > tools/testing/selftests/bpf/progs/iters.c | 14 ++-- > .../selftests/bpf/progs/map_kptr_fail.c | 2 +- > .../selftests/bpf/progs/verifier_map_in_map.c | 2 +- > .../testing/selftests/bpf/verifier/map_kptr.c | 2 +- > 5 files changed, 87 insertions(+), 13 deletions(-) > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 58b36cc96bd5..4947ef884a18 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -287,6 +287,7 @@ struct bpf_call_arg_meta { > u32 ret_btf_id; > u32 subprogno; > struct btf_field *kptr_field; > + s64 const_map_key; > }; > > struct bpf_kfunc_call_arg_meta { > @@ -9163,6 +9164,53 @@ static int check_reg_const_str(struct bpf_verifier_env *env, > return 0; > } > > +/* Returns constant key value if possible, else -1 */ > +static s64 get_constant_map_key(struct bpf_verifier_env *env, > + struct bpf_reg_state *key, > + u32 key_size) > +{ > + struct bpf_func_state *state = func(env, key); > + struct bpf_reg_state *reg; > + int zero_size = 0; > + int stack_off; > + u8 *stype; > + int slot; > + int spi; > + int i; > + > + if (!env->bpf_capable) > + return -1; > + if (key->type != PTR_TO_STACK) > + return -1; > + if (!tnum_is_const(key->var_off)) > + return -1; > + > + stack_off = key->off + key->var_off.value; > + slot = -stack_off - 1; > + spi = slot / BPF_REG_SIZE; > + > + /* First handle precisely tracked STACK_ZERO, up to BPF_REG_SIZE */ > + stype = state->stack[spi].slot_type; > + for (i = 0; i < BPF_REG_SIZE && stype[i] == STACK_ZERO; i++) > + zero_size++; > + if (zero_size == key_size) > + return 0; > + > + if (!is_spilled_reg(&state->stack[spi])) > + /* Not pointer to stack */ > + return -1; > + > + reg = &state->stack[spi].spilled_ptr; > + if (reg->type != SCALAR_VALUE) > + /* Only scalars are valid array map keys */ > + return -1; > + else if (!tnum_is_const(reg->var_off)) > + /* Stack value not statically known */ > + return -1; > + > + return reg->var_off.value; > +} > + > static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > struct bpf_call_arg_meta *meta, > const struct bpf_func_proto *fn, > @@ -9173,6 +9221,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > enum bpf_arg_type arg_type = fn->arg_type[arg]; > enum bpf_reg_type type = reg->type; > u32 *arg_btf_id = NULL; > + u32 key_size; > int err = 0; > bool mask; > > @@ -9307,8 +9356,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > verbose(env, "invalid map_ptr to access map->key\n"); > return -EACCES; > } > - err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, > - BPF_READ, false, NULL); > + key_size = meta->map_ptr->key_size; > + err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); > + if (err) > + return err; > + meta->const_map_key = get_constant_map_key(env, reg, key_size); > break; > case ARG_PTR_TO_MAP_VALUE: > if (type_may_be_null(arg_type) && register_is_null(reg)) > @@ -10833,6 +10885,21 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno > state->callback_subprogno == subprogno); > } > > +/* Returns whether or not the given map type can potentially elide > + * lookup return value nullness check. This is possible if the key > + * is statically known. > + */ > +static bool can_elide_value_nullness(enum bpf_map_type type) > +{ > + switch (type) { > + case BPF_MAP_TYPE_ARRAY: > + case BPF_MAP_TYPE_PERCPU_ARRAY: > + return true; > + default: > + return false; > + } > +} > + > static int get_helper_proto(struct bpf_verifier_env *env, int func_id, > const struct bpf_func_proto **ptr) > { > @@ -11199,10 +11266,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn > "kernel subsystem misconfigured verifier\n"); > return -EINVAL; > } > + > + if (func_id == BPF_FUNC_map_lookup_elem && > + can_elide_value_nullness(meta.map_ptr->map_type) && > + meta.const_map_key >= 0 && > + meta.const_map_key < meta.map_ptr->max_entries) > + ret_flag &= ~PTR_MAYBE_NULL; I think we probably need mark_chain_precision applied on the constant key since its concrete value is made use of here to prevent pruning on it. If it's already happening and I missed it, I think we should atleast add a comment. For context of a similar case with tail calls, see commit cc52d9140aa9 ("bpf: Fix record_func_key to perform backtracking on r3") for what happens when it is missed. > + > regs[BPF_REG_0].map_ptr = meta.map_ptr; > regs[BPF_REG_0].map_uid = meta.map_uid; > regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; > - if (!type_may_be_null(ret_type) && > + if (!type_may_be_null(ret_flag) && > btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) { > regs[BPF_REG_0].id = ++env->id_gen; > } > diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c > index 7c969c127573..190822b2f08b 100644 > --- a/tools/testing/selftests/bpf/progs/iters.c > +++ b/tools/testing/selftests/bpf/progs/iters.c > @@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx) > } > > struct { > - __uint(type, BPF_MAP_TYPE_ARRAY); > + __uint(type, BPF_MAP_TYPE_HASH); > __type(key, int); > __type(value, int); > __uint(max_entries, 1000); > -} arr_map SEC(".maps"); > +} hash_map SEC(".maps"); > > SEC("?raw_tp") > __failure __msg("invalid mem access 'scalar'") > @@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx) > > MY_PID_GUARD(); > > - map_val = bpf_map_lookup_elem(&arr_map, &key); > + map_val = bpf_map_lookup_elem(&hash_map, &key); > if (!map_val) > return 0; > > @@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx) > > MY_PID_GUARD(); > > - map_val = bpf_map_lookup_elem(&arr_map, &key); > + map_val = bpf_map_lookup_elem(&hash_map, &key); > if (!map_val) > return 0; > > bpf_repeat(1000000) { > - map_val = bpf_map_lookup_elem(&arr_map, &key); > + map_val = bpf_map_lookup_elem(&hash_map, &key); > } > > *map_val = 123; > @@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx) > MY_PID_GUARD(); > > bpf_repeat(1000000) { > - map_val = bpf_map_lookup_elem(&arr_map, &key); > + map_val = bpf_map_lookup_elem(&hash_map, &key); > found = true; > } > > @@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx) > MY_PID_GUARD(); > > bpf_repeat(1000000) { > - map_val = bpf_map_lookup_elem(&arr_map, &key); > + map_val = bpf_map_lookup_elem(&hash_map, &key); > if (map_val) { > found = true; > break; > diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c > index c2a6bd392e48..4c0ff01f1a96 100644 > --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c > +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c > @@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx) > } > > SEC("?tc") > -__failure __msg("Unreleased reference id=5 alloc_insn=") > +__failure __msg("Unreleased reference id=4 alloc_insn=") > int kptr_xchg_ref_state(struct __sk_buff *ctx) > { > struct prog_test_ref_kfunc *p; > diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c > index 4eaab1468eb7..7d088ba99ea5 100644 > --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c > +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c > @@ -47,7 +47,7 @@ l0_%=: r0 = 0; \ > > SEC("xdp") > __description("map in map state pruning") > -__success __msg("processed 26 insns") > +__success __msg("processed 15 insns") > __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ) > __naked void map_in_map_state_pruning(void) > { > diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c > index f420c0312aa0..4b39f8472f9b 100644 > --- a/tools/testing/selftests/bpf/verifier/map_kptr.c > +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c > @@ -373,7 +373,7 @@ > .prog_type = BPF_PROG_TYPE_SCHED_CLS, > .fixup_map_kptr = { 1 }, > .result = REJECT, > - .errstr = "Unreleased reference id=5 alloc_insn=20", > + .errstr = "Unreleased reference id=4 alloc_insn=20", > .fixup_kfunc_btf_id = { > { "bpf_kfunc_call_test_acquire", 15 }, > } > -- > 2.46.0 > >