On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote: > > css_iter and task_iter should be used in rcu section. Specifically, in > sleepable progs explicit bpf_rcu_read_lock() is needed before use these > iters. In normal bpf progs that have implicit rcu_read_lock(), it's OK to > use them directly. > > This patch adds a new a KF flag KF_RCU_PROTECTED for bpf_iter_task_new and > bpf_iter_css_new. It means the kfunc should be used in RCU CS. We check > whether we are in rcu cs before we want to invoke this kfunc. If the rcu > protection is guaranteed, we would let st->type = PTR_TO_STACK | MEM_RCU. > Once user do rcu_unlock during the iteration, state MEM_RCU of regs would > be cleared. is_iter_reg_valid_init() will reject if reg->type is UNTRUSTED. > > It is worth noting that currently, bpf_rcu_read_unlock does not > clear the state of the STACK_ITER reg, since bpf_for_each_spilled_reg > only considers STACK_SPILL. This patch also let bpf_for_each_spilled_reg > search STACK_ITER. > > Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> > --- > include/linux/bpf_verifier.h | 19 ++++++++------ > include/linux/btf.h | 1 + > kernel/bpf/helpers.c | 4 +-- > kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++--------- > 4 files changed, 50 insertions(+), 22 deletions(-) > [...] > > -static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, > +static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, > struct btf *btf, u32 btf_id, int nr_slots) > { > struct bpf_func_state *state = func(env, reg); > @@ -1275,26 +1286,28 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_ > > spi = iter_get_spi(env, reg, nr_slots); > if (spi < 0) > - return false; > + return -EINVAL; > > for (i = 0; i < nr_slots; i++) { > struct bpf_stack_state *slot = &state->stack[spi - i]; > struct bpf_reg_state *st = &slot->spilled_ptr; > > + if (st->type & PTR_UNTRUSTED) > + return -EPERM; > /* only main (first) slot has ref_obj_id set */ > if (i == 0 && !st->ref_obj_id) > - return false; > + return -EINVAL; > if (i != 0 && st->ref_obj_id) > - return false; > + return -EINVAL; > if (st->iter.btf != btf || st->iter.btf_id != btf_id) > - return false; > + return -EINVAL; > > for (j = 0; j < BPF_REG_SIZE; j++) > if (slot->slot_type[j] != STACK_ITER) > - return false; > + return -EINVAL; > } > > - return true; > + return 0; > } > > /* Check if given stack slot is "special": > @@ -7503,15 +7516,20 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id > return err; > } > > - err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots); > + err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots); > if (err) > return err; > } else { > /* iter_next() or iter_destroy() expect initialized iter state*/ > - if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) { > - verbose(env, "expected an initialized iter_%s as arg #%d\n", > + err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots); > + switch (err) { > + case -EINVAL: I'd also add default: here, in case we ever emit some other error from is_iter_reg_valid_init() > + verbose(env, "expected an initialized iter_%s as arg #%d or without bpf_rcu_read_lock()\n", > iter_type_str(meta->btf, btf_id), regno); > - return -EINVAL; > + return err; > + case -EPERM: I find -EPERM a bit confusing. Let's use something a bit more specific, e.g., -EPROTO? We are basically not following a protocol if we don't keep RCU-protected iterators within a single RCU region, right? > + verbose(env, "expected an RCU CS when using %s\n", meta->func_name); > + return err; > } > > spi = iter_get_spi(env, reg, nr_slots); > @@ -10092,6 +10110,11 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) > return meta->kfunc_flags & KF_RCU; > } > > +static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta) > +{ > + return meta->kfunc_flags & KF_RCU_PROTECTED; > +} > + > static bool __kfunc_param_match_suffix(const struct btf *btf, > const struct btf_param *arg, > const char *suffix) > @@ -11428,6 +11451,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > if (env->cur_state->active_rcu_lock) { > struct bpf_func_state *state; > struct bpf_reg_state *reg; > + u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); > > if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { > verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > @@ -11438,7 +11462,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); > return -EINVAL; > } else if (rcu_unlock) { > - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ > + bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({ > if (reg->type & MEM_RCU) { > reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); > reg->type |= PTR_UNTRUSTED; > -- > 2.20.1 >