Re: [PATCH bpf-next v3] bpf: Refactor active lock management

Alexei Starovoitov <alexei.starovoitov@xxxxxxxxx> · Mon, 4 Nov 2024 11:33:26 -0800

On Mon, Nov 4, 2024 at 7:17 AM Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote:
>
> When bpf_spin_lock was introduced originally, there was deliberation on
> whether to use an array of lock IDs, but since bpf_spin_lock is limited
> to holding a single lock at any given time, we've been using a single ID
> to identify the held lock.
>
> In preparation for introducing spin locks that can be taken multiple
> times, introduce support for acquiring multiple lock IDs. For this
> purpose, reuse the acquired_refs array and store both lock and pointer
> references. We tag the entry with REF_TYPE_PTR or REF_TYPE_BPF_LOCK to
> disambiguate and find the relevant entry. The ptr field is used to track
> the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be
> matched with protected fields within the same "allocation", i.e.
> bpf_obj_new object or map value.
>
> The struct active_lock is changed to a boolean as the state is part of
> the acquired_refs array, and we only need active_lock as a cheap way
> of detecting lock presence.
>
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
> ---
> Changelog:
> v2 -> v3
> v2: https://lore.kernel.org/bpf/20241103212252.547071-1-memxor@xxxxxxxxx
>
>  * Rebase on bpf-next to resolve merge conflict
>
> v1 -> v2
> v1: https://lore.kernel.org/bpf/20241103205856.345580-1-memxor@xxxxxxxxx
>
>  * Fix refsafe state comparison to check callback_ref and ptr separately.
> ---
>  include/linux/bpf_verifier.h |  34 ++++++---
>  kernel/bpf/verifier.c        | 138 ++++++++++++++++++++++++++---------
>  2 files changed, 126 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 4513372c5bc8..1e7e1803d78b 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -266,6 +266,10 @@ struct bpf_stack_state {
>  };
>
>  struct bpf_reference_state {
> +       /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to
> +        * default to pointer reference on zero initialization of a state.
> +        */
> +       enum { REF_TYPE_PTR = 0, REF_TYPE_BPF_LOCK } type;

In the future commit two more values will be added, right?
And it may push it over line limit.
I think cleaner to do each value on separate line:
  enum ref_state_type {
    REF_TYPE_PTR = 0,
    REF_TYPE_BPF_LOCK
  } type;

easier to read and less churn.
Also name that enum type to use later.

Also I'd drop _BPF_ from the middle.
Just REF_TYPE_LOCK would do.

>         /* Track each reference created with a unique id, even if the same
>          * instruction creates the reference multiple times (eg, via CALL).
>          */
> @@ -274,17 +278,23 @@ struct bpf_reference_state {
>          * is used purely to inform the user of a reference leak.
>          */
>         int insn_idx;
> -       /* There can be a case like:
> -        * main (frame 0)
> -        *  cb (frame 1)
> -        *   func (frame 3)
> -        *    cb (frame 4)
> -        * Hence for frame 4, if callback_ref just stored boolean, it would be
> -        * impossible to distinguish nested callback refs. Hence store the
> -        * frameno and compare that to callback_ref in check_reference_leak when
> -        * exiting a callback function.
> -        */
> -       int callback_ref;
> +       union {
> +               /* There can be a case like:
> +                * main (frame 0)
> +                *  cb (frame 1)
> +                *   func (frame 3)
> +                *    cb (frame 4)
> +                * Hence for frame 4, if callback_ref just stored boolean, it would be
> +                * impossible to distinguish nested callback refs. Hence store the
> +                * frameno and compare that to callback_ref in check_reference_leak when
> +                * exiting a callback function.
> +                */
> +               int callback_ref;
> +               /* Use to keep track of the source object of a lock, to ensure
> +                * it matches on unlock.
> +                */
> +               void *ptr;
> +       };
>  };
>
>  struct bpf_retval_range {
> @@ -434,7 +444,7 @@ struct bpf_verifier_state {
>         u32 insn_idx;
>         u32 curframe;
>
> -       struct bpf_active_lock active_lock;

remove struct bpf_active_lock as well.

> +       bool active_lock;

In the next patch it becomes 'int',
so let's make it 'int' right away and move it to bpf_func_state
next to:
        int acquired_refs;
        struct bpf_reference_state *refs;

since it's counting locks in this array.
'bool' is just a weird from of 1 or 0.
So 'int' is cleaner in this patch too.

>         bool speculative;
>         bool active_rcu_lock;
>         u32 active_preempt_lock;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index ba800c7611e3..ea8ad320e6cc 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1335,6 +1335,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
>         if (err)
>                 return err;
>         id = ++env->id_gen;
> +       state->refs[new_ofs].type = REF_TYPE_PTR;
>         state->refs[new_ofs].id = id;
>         state->refs[new_ofs].insn_idx = insn_idx;
>         state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
> @@ -1342,6 +1343,23 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
>         return id;
>  }
>
> +static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, int type, int id, void *ptr)

enum ref_state_type instead of 'int type' ?

> +{
> +       struct bpf_func_state *state = cur_func(env);
> +       int new_ofs = state->acquired_refs;
> +       int err;
> +
> +       err = resize_reference_state(state, state->acquired_refs + 1);
> +       if (err)
> +               return err;
> +       state->refs[new_ofs].type = type;
> +       state->refs[new_ofs].id = id;
> +       state->refs[new_ofs].insn_idx = insn_idx;
> +       state->refs[new_ofs].ptr = ptr;
> +
> +       return 0;
> +}
> +
>  /* release function corresponding to acquire_reference_state(). Idempotent. */
>  static int release_reference_state(struct bpf_func_state *state, int ptr_id)
>  {
> @@ -1349,6 +1367,8 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
>
>         last_idx = state->acquired_refs - 1;
>         for (i = 0; i < state->acquired_refs; i++) {
> +               if (state->refs[i].type != REF_TYPE_PTR)
> +                       continue;
>                 if (state->refs[i].id == ptr_id) {
>                         /* Cannot release caller references in callbacks */
>                         if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
> @@ -1364,6 +1384,43 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
>         return -EINVAL;
>  }
>
> +static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr)
> +{
> +       int i, last_idx;
> +
> +       last_idx = state->acquired_refs - 1;
> +       for (i = 0; i < state->acquired_refs; i++) {
> +               if (state->refs[i].type != type)
> +                       continue;
> +               if (state->refs[i].id == id && state->refs[i].ptr == ptr) {
> +                       if (last_idx && i != last_idx)
> +                               memcpy(&state->refs[i], &state->refs[last_idx],
> +                                      sizeof(*state->refs));
> +                       memset(&state->refs[last_idx], 0, sizeof(*state->refs));
> +                       state->acquired_refs--;
> +                       return 0;
> +               }
> +       }
> +       return -EINVAL;
> +}
> +
> +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, int id, void *ptr)
> +{
> +       struct bpf_func_state *state = cur_func(env);
> +       int i;
> +
> +       for (i = 0; i < state->acquired_refs; i++) {
> +               struct bpf_reference_state *s = &state->refs[i];
> +
> +               if (s->type == REF_TYPE_PTR)
> +                       continue;

Let's pass 'enum ref_state_type type' and compare here
to make it similar to release_lock_state().
I think it will clean up future patches too.

> +
> +               if (s->id == id && s->ptr == ptr)
> +                       return s;
> +       }
> +       return NULL;
> +}
> +
>  static void free_func_state(struct bpf_func_state *state)
>  {
>         if (!state)
> @@ -1430,12 +1487,11 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
>                 dst_state->frame[i] = NULL;
>         }
>         dst_state->speculative = src->speculative;
> +       dst_state->active_lock = src->active_lock;
>         dst_state->active_rcu_lock = src->active_rcu_lock;
>         dst_state->active_preempt_lock = src->active_preempt_lock;
>         dst_state->in_sleepable = src->in_sleepable;
>         dst_state->curframe = src->curframe;
> -       dst_state->active_lock.ptr = src->active_lock.ptr;
> -       dst_state->active_lock.id = src->active_lock.id;
>         dst_state->branches = src->branches;
>         dst_state->parent = src->parent;
>         dst_state->first_insn_idx = src->first_insn_idx;
> @@ -5423,7 +5479,7 @@ static bool in_sleepable(struct bpf_verifier_env *env)
>  static bool in_rcu_cs(struct bpf_verifier_env *env)
>  {
>         return env->cur_state->active_rcu_lock ||
> -              env->cur_state->active_lock.ptr ||
> +              env->cur_state->active_lock ||
>                !in_sleepable(env);
>  }
>
> @@ -7698,6 +7754,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
>         struct bpf_map *map = NULL;
>         struct btf *btf = NULL;
>         struct btf_record *rec;
> +       int err;
>
>         if (!is_const) {
>                 verbose(env,
> @@ -7729,16 +7786,27 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
>                 return -EINVAL;
>         }
>         if (is_lock) {
> -               if (cur->active_lock.ptr) {
> +               void *ptr;
> +
> +               if (map)
> +                       ptr = map;
> +               else
> +                       ptr = btf;
> +
> +               if (cur->active_lock) {
>                         verbose(env,
>                                 "Locking two bpf_spin_locks are not allowed\n");
>                         return -EINVAL;
>                 }
> -               if (map)
> -                       cur->active_lock.ptr = map;
> -               else
> -                       cur->active_lock.ptr = btf;
> -               cur->active_lock.id = reg->id;
> +               err = acquire_lock_state(env, env->insn_idx, REF_TYPE_BPF_LOCK, reg->id, ptr);
> +               if (err < 0) {
> +                       verbose(env, "Failed to acquire lock state\n");
> +                       return err;
> +               }
> +               /* It is not safe to allow multiple bpf_spin_lock calls, so
> +                * disallow them until this lock has been unlocked.
> +                */
> +               cur->active_lock = true;
>         } else {
>                 void *ptr;
>
> @@ -7747,20 +7815,18 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
>                 else
>                         ptr = btf;
>
> -               if (!cur->active_lock.ptr) {
> +               if (!cur->active_lock) {
>                         verbose(env, "bpf_spin_unlock without taking a lock\n");
>                         return -EINVAL;
>                 }
> -               if (cur->active_lock.ptr != ptr ||
> -                   cur->active_lock.id != reg->id) {
> +
> +               if (release_lock_state(cur_func(env), REF_TYPE_BPF_LOCK, reg->id, ptr)) {
>                         verbose(env, "bpf_spin_unlock of different lock\n");
>                         return -EINVAL;
>                 }
>
>                 invalidate_non_owning_refs(env);
> -
> -               cur->active_lock.ptr = NULL;
> -               cur->active_lock.id = 0;
> +               cur->active_lock = false;
>         }
>         return 0;
>  }
> @@ -9818,7 +9884,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 const char *sub_name = subprog_name(env, subprog);
>
>                 /* Only global subprogs cannot be called with a lock held. */
> -               if (env->cur_state->active_lock.ptr) {
> +               if (env->cur_state->active_lock) {
>                         verbose(env, "global function calls are not allowed while holding a lock,\n"
>                                      "use static function instead\n");
>                         return -EINVAL;
> @@ -10343,6 +10409,8 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi
>                 return 0;
>
>         for (i = 0; i < state->acquired_refs; i++) {
> +               if (state->refs[i].type != REF_TYPE_PTR)
> +                       continue;

why ?
check_reference_leak() should complain about unreleased locks too.

>                 if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
>                         continue;
>                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
> @@ -10356,7 +10424,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit
>  {
>         int err;
>
> -       if (check_lock && env->cur_state->active_lock.ptr) {
> +       if (check_lock && env->cur_state->active_lock) {
>                 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
>                 return -EINVAL;
>         }
> @@ -11580,7 +11648,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state
>         struct bpf_verifier_state *state = env->cur_state;
>         struct btf_record *rec = reg_btf_record(reg);
>
> -       if (!state->active_lock.ptr) {
> +       if (!state->active_lock) {
>                 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
>                 return -EFAULT;
>         }
> @@ -11677,6 +11745,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o
>   */
>  static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
>  {
> +       struct bpf_reference_state *s;
>         void *ptr;
>         u32 id;
>
> @@ -11693,10 +11762,10 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
>         }
>         id = reg->id;
>
> -       if (!env->cur_state->active_lock.ptr)
> +       if (!env->cur_state->active_lock)
>                 return -EINVAL;
> -       if (env->cur_state->active_lock.ptr != ptr ||
> -           env->cur_state->active_lock.id != id) {
> +       s = find_lock_state(env, id, ptr);
> +       if (!s) {
>                 verbose(env, "held lock and object are not in the same allocation\n");
>                 return -EINVAL;
>         }
> @@ -17561,8 +17630,19 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
>                 return false;
>
>         for (i = 0; i < old->acquired_refs; i++) {
> -               if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
> +               if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
> +                   old->refs[i].type != cur->refs[i].type)
>                         return false;
> +               switch (old->refs[i].type) {
> +               case REF_TYPE_PTR:
> +                       if (old->refs[i].callback_ref != cur->refs[i].callback_ref)
> +                               return false;
> +                       break;
> +               default:
> +                       if (old->refs[i].ptr != cur->refs[i].ptr)
> +                               return false;
> +                       break;

I'd future proof a bit by explicitly handling all enum values
and WARN_ONCE in default.

pw-bot: cr