Re: [PATCH bpf-next v3 5/7] bpf: teach the verifier to enforce css_iter and task_iter in RCU CS

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Wed, 27 Sep 2023 16:29:59 -0700

On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote:
>
> css_iter and task_iter should be used in rcu section. Specifically, in
> sleepable progs explicit bpf_rcu_read_lock() is needed before use these
> iters. In normal bpf progs that have implicit rcu_read_lock(), it's OK to
> use them directly.
>
> This patch adds a new a KF flag KF_RCU_PROTECTED for bpf_iter_task_new and
> bpf_iter_css_new. It means the kfunc should be used in RCU CS. We check
> whether we are in rcu cs before we want to invoke this kfunc. If the rcu
> protection is guaranteed, we would let st->type = PTR_TO_STACK | MEM_RCU.
> Once user do rcu_unlock during the iteration, state MEM_RCU of regs would
> be cleared. is_iter_reg_valid_init() will reject if reg->type is UNTRUSTED.
>
> It is worth noting that currently, bpf_rcu_read_unlock does not
> clear the state of the STACK_ITER reg, since bpf_for_each_spilled_reg
> only considers STACK_SPILL. This patch also let bpf_for_each_spilled_reg
> search STACK_ITER.
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx>
> ---
>  include/linux/bpf_verifier.h | 19 ++++++++------
>  include/linux/btf.h          |  1 +
>  kernel/bpf/helpers.c         |  4 +--
>  kernel/bpf/verifier.c        | 48 +++++++++++++++++++++++++++---------
>  4 files changed, 50 insertions(+), 22 deletions(-)
>

[...]

>
> -static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
> +static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
>                                    struct btf *btf, u32 btf_id, int nr_slots)
>  {
>         struct bpf_func_state *state = func(env, reg);
> @@ -1275,26 +1286,28 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_
>
>         spi = iter_get_spi(env, reg, nr_slots);
>         if (spi < 0)
> -               return false;
> +               return -EINVAL;
>
>         for (i = 0; i < nr_slots; i++) {
>                 struct bpf_stack_state *slot = &state->stack[spi - i];
>                 struct bpf_reg_state *st = &slot->spilled_ptr;
>
> +               if (st->type & PTR_UNTRUSTED)
> +                       return -EPERM;
>                 /* only main (first) slot has ref_obj_id set */
>                 if (i == 0 && !st->ref_obj_id)
> -                       return false;
> +                       return -EINVAL;
>                 if (i != 0 && st->ref_obj_id)
> -                       return false;
> +                       return -EINVAL;
>                 if (st->iter.btf != btf || st->iter.btf_id != btf_id)
> -                       return false;
> +                       return -EINVAL;
>
>                 for (j = 0; j < BPF_REG_SIZE; j++)
>                         if (slot->slot_type[j] != STACK_ITER)
> -                               return false;
> +                               return -EINVAL;
>         }
>
> -       return true;
> +       return 0;
>  }
>
>  /* Check if given stack slot is "special":
> @@ -7503,15 +7516,20 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
>                                 return err;
>                 }
>
> -               err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
> +               err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
>                 if (err)
>                         return err;
>         } else {
>                 /* iter_next() or iter_destroy() expect initialized iter state*/
> -               if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
> -                       verbose(env, "expected an initialized iter_%s as arg #%d\n",
> +               err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
> +               switch (err) {
> +               case -EINVAL:

I'd also add default: here, in case we ever emit some other error from
is_iter_reg_valid_init()

> +                       verbose(env, "expected an initialized iter_%s as arg #%d or without bpf_rcu_read_lock()\n",
>                                 iter_type_str(meta->btf, btf_id), regno);
> -                       return -EINVAL;
> +                       return err;
> +               case -EPERM:

I find -EPERM a bit confusing. Let's use something a bit more
specific, e.g., -EPROTO? We are basically not following a protocol if
we don't keep RCU-protected iterators within a single RCU region,
right?

> +                       verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
> +                       return err;
>                 }
>
>                 spi = iter_get_spi(env, reg, nr_slots);
> @@ -10092,6 +10110,11 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
>         return meta->kfunc_flags & KF_RCU;
>  }
>
> +static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
> +{
> +       return meta->kfunc_flags & KF_RCU_PROTECTED;
> +}
> +
>  static bool __kfunc_param_match_suffix(const struct btf *btf,
>                                        const struct btf_param *arg,
>                                        const char *suffix)
> @@ -11428,6 +11451,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>         if (env->cur_state->active_rcu_lock) {
>                 struct bpf_func_state *state;
>                 struct bpf_reg_state *reg;
> +               u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
>
>                 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
>                         verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
> @@ -11438,7 +11462,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                         verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
>                         return -EINVAL;
>                 } else if (rcu_unlock) {
> -                       bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
> +                       bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
>                                 if (reg->type & MEM_RCU) {
>                                         reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
>                                         reg->type |= PTR_UNTRUSTED;
> --
> 2.20.1
>