Re: [PATCH bpf-next v3] bpf: Refactor active lock management

Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> · Mon, 4 Nov 2024 14:26:36 -0600

On Mon, 4 Nov 2024 at 13:33, Alexei Starovoitov
<alexei.starovoitov@xxxxxxxxx> wrote:
>
> On Mon, Nov 4, 2024 at 7:17 AM Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote:
> >
> > When bpf_spin_lock was introduced originally, there was deliberation on
> > whether to use an array of lock IDs, but since bpf_spin_lock is limited
> > to holding a single lock at any given time, we've been using a single ID
> > to identify the held lock.
> >
> > In preparation for introducing spin locks that can be taken multiple
> > times, introduce support for acquiring multiple lock IDs. For this
> > purpose, reuse the acquired_refs array and store both lock and pointer
> > references. We tag the entry with REF_TYPE_PTR or REF_TYPE_BPF_LOCK to
> > disambiguate and find the relevant entry. The ptr field is used to track
> > the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be
> > matched with protected fields within the same "allocation", i.e.
> > bpf_obj_new object or map value.
> >
> > The struct active_lock is changed to a boolean as the state is part of
> > the acquired_refs array, and we only need active_lock as a cheap way
> > of detecting lock presence.
> >
> > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
> > ---
> > Changelog:
> > v2 -> v3
> > v2: https://lore.kernel.org/bpf/20241103212252.547071-1-memxor@xxxxxxxxx
> >
> >  * Rebase on bpf-next to resolve merge conflict
> >
> > v1 -> v2
> > v1: https://lore.kernel.org/bpf/20241103205856.345580-1-memxor@xxxxxxxxx
> >
> >  * Fix refsafe state comparison to check callback_ref and ptr separately.
> > ---
> >  include/linux/bpf_verifier.h |  34 ++++++---
> >  kernel/bpf/verifier.c        | 138 ++++++++++++++++++++++++++---------
> >  2 files changed, 126 insertions(+), 46 deletions(-)
> >
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 4513372c5bc8..1e7e1803d78b 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -266,6 +266,10 @@ struct bpf_stack_state {
> >  };
> >
> >  struct bpf_reference_state {
> > +       /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to
> > +        * default to pointer reference on zero initialization of a state.
> > +        */
> > +       enum { REF_TYPE_PTR = 0, REF_TYPE_BPF_LOCK } type;
>
> In the future commit two more values will be added, right?
> And it may push it over line limit.
> I think cleaner to do each value on separate line:
>   enum ref_state_type {
>     REF_TYPE_PTR = 0,
>     REF_TYPE_BPF_LOCK
>   } type;
>
> easier to read and less churn.
> Also name that enum type to use later.
>
> Also I'd drop _BPF_ from the middle.
> Just REF_TYPE_LOCK would do.

We're going to have a different lock type though.
But I can rename it when I add it later.

>
> >         /* Track each reference created with a unique id, even if the same
> >          * instruction creates the reference multiple times (eg, via CALL).
> >          */
> > @@ -274,17 +278,23 @@ struct bpf_reference_state {
> >          * is used purely to inform the user of a reference leak.
> >          */
> >         int insn_idx;
> > -       /* There can be a case like:
> > -        * main (frame 0)
> > -        *  cb (frame 1)
> > -        *   func (frame 3)
> > -        *    cb (frame 4)
> > -        * Hence for frame 4, if callback_ref just stored boolean, it would be
> > -        * impossible to distinguish nested callback refs. Hence store the
> > -        * frameno and compare that to callback_ref in check_reference_leak when
> > -        * exiting a callback function.
> > -        */
> > -       int callback_ref;
> > +       union {
> > +               /* There can be a case like:
> > +                * main (frame 0)
> > +                *  cb (frame 1)
> > +                *   func (frame 3)
> > +                *    cb (frame 4)
> > +                * Hence for frame 4, if callback_ref just stored boolean, it would be
> > +                * impossible to distinguish nested callback refs. Hence store the
> > +                * frameno and compare that to callback_ref in check_reference_leak when
> > +                * exiting a callback function.
> > +                */
> > +               int callback_ref;
> > +               /* Use to keep track of the source object of a lock, to ensure
> > +                * it matches on unlock.
> > +                */
> > +               void *ptr;
> > +       };
> >  };
> >
> >  struct bpf_retval_range {
> > @@ -434,7 +444,7 @@ struct bpf_verifier_state {
> >         u32 insn_idx;
> >         u32 curframe;
> >
> > -       struct bpf_active_lock active_lock;
>
> remove struct bpf_active_lock as well.
>

Ack.

> > +       bool active_lock;
>
> In the next patch it becomes 'int',
> so let's make it 'int' right away and move it to bpf_func_state
> next to:
>         int acquired_refs;
>         struct bpf_reference_state *refs;
>
> since it's counting locks in this array.
> 'bool' is just a weird from of 1 or 0.
> So 'int' is cleaner in this patch too.
>

Ack.

> >         bool speculative;
> >         bool active_rcu_lock;
> >         u32 active_preempt_lock;
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index ba800c7611e3..ea8ad320e6cc 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -1335,6 +1335,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
> >         if (err)
> >                 return err;
> >         id = ++env->id_gen;
> > +       state->refs[new_ofs].type = REF_TYPE_PTR;
> >         state->refs[new_ofs].id = id;
> >         state->refs[new_ofs].insn_idx = insn_idx;
> >         state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
> > @@ -1342,6 +1343,23 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
> >         return id;
> >  }
> >
> > +static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, int type, int id, void *ptr)
>
> enum ref_state_type instead of 'int type' ?
>

Ack, but eventually it may require passing OR of flags, which I
believe makes the compiler unhappy.
I can make that change later though.

> > +{
> > +       struct bpf_func_state *state = cur_func(env);
> > +       int new_ofs = state->acquired_refs;
> > +       int err;
> > +
> > +       err = resize_reference_state(state, state->acquired_refs + 1);
> > +       if (err)
> > +               return err;
> > +       state->refs[new_ofs].type = type;
> > +       state->refs[new_ofs].id = id;
> > +       state->refs[new_ofs].insn_idx = insn_idx;
> > +       state->refs[new_ofs].ptr = ptr;
> > +
> > +       return 0;
> > +}
> > +
> >  /* release function corresponding to acquire_reference_state(). Idempotent. */
> >  static int release_reference_state(struct bpf_func_state *state, int ptr_id)
> >  {
> > @@ -1349,6 +1367,8 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
> >
> >         last_idx = state->acquired_refs - 1;
> >         for (i = 0; i < state->acquired_refs; i++) {
> > +               if (state->refs[i].type != REF_TYPE_PTR)
> > +                       continue;
> >                 if (state->refs[i].id == ptr_id) {
> >                         /* Cannot release caller references in callbacks */
> >                         if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
> > @@ -1364,6 +1384,43 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
> >         return -EINVAL;
> >  }
> >
> > +static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr)
> > +{
> > +       int i, last_idx;
> > +
> > +       last_idx = state->acquired_refs - 1;
> > +       for (i = 0; i < state->acquired_refs; i++) {
> > +               if (state->refs[i].type != type)
> > +                       continue;
> > +               if (state->refs[i].id == id && state->refs[i].ptr == ptr) {
> > +                       if (last_idx && i != last_idx)
> > +                               memcpy(&state->refs[i], &state->refs[last_idx],
> > +                                      sizeof(*state->refs));
> > +                       memset(&state->refs[last_idx], 0, sizeof(*state->refs));
> > +                       state->acquired_refs--;
> > +                       return 0;
> > +               }
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, int id, void *ptr)
> > +{
> > +       struct bpf_func_state *state = cur_func(env);
> > +       int i;
> > +
> > +       for (i = 0; i < state->acquired_refs; i++) {
> > +               struct bpf_reference_state *s = &state->refs[i];
> > +
> > +               if (s->type == REF_TYPE_PTR)
> > +                       continue;
>
> Let's pass 'enum ref_state_type type' and compare here
> to make it similar to release_lock_state().
> I think it will clean up future patches too.
>

Ok.

> > +
> > +               if (s->id == id && s->ptr == ptr)
> > +                       return s;
> > +       }
> > +       return NULL;
> > +}
> > +
> >  static void free_func_state(struct bpf_func_state *state)
> >  {
> >         if (!state)
> > @@ -1430,12 +1487,11 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
> >                 dst_state->frame[i] = NULL;
> >         }
> >         dst_state->speculative = src->speculative;
> > +       dst_state->active_lock = src->active_lock;
> >         dst_state->active_rcu_lock = src->active_rcu_lock;
> >         dst_state->active_preempt_lock = src->active_preempt_lock;
> >         dst_state->in_sleepable = src->in_sleepable;
> >         dst_state->curframe = src->curframe;
> > -       dst_state->active_lock.ptr = src->active_lock.ptr;
> > -       dst_state->active_lock.id = src->active_lock.id;
> >         dst_state->branches = src->branches;
> >         dst_state->parent = src->parent;
> >         dst_state->first_insn_idx = src->first_insn_idx;
> > @@ -5423,7 +5479,7 @@ static bool in_sleepable(struct bpf_verifier_env *env)
> >  static bool in_rcu_cs(struct bpf_verifier_env *env)
> >  {
> >         return env->cur_state->active_rcu_lock ||
> > -              env->cur_state->active_lock.ptr ||
> > +              env->cur_state->active_lock ||
> >                !in_sleepable(env);
> >  }
> >
> > @@ -7698,6 +7754,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
> >         struct bpf_map *map = NULL;
> >         struct btf *btf = NULL;
> >         struct btf_record *rec;
> > +       int err;
> >
> >         if (!is_const) {
> >                 verbose(env,
> > @@ -7729,16 +7786,27 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
> >                 return -EINVAL;
> >         }
> >         if (is_lock) {
> > -               if (cur->active_lock.ptr) {
> > +               void *ptr;
> > +
> > +               if (map)
> > +                       ptr = map;
> > +               else
> > +                       ptr = btf;
> > +
> > +               if (cur->active_lock) {
> >                         verbose(env,
> >                                 "Locking two bpf_spin_locks are not allowed\n");
> >                         return -EINVAL;
> >                 }
> > -               if (map)
> > -                       cur->active_lock.ptr = map;
> > -               else
> > -                       cur->active_lock.ptr = btf;
> > -               cur->active_lock.id = reg->id;
> > +               err = acquire_lock_state(env, env->insn_idx, REF_TYPE_BPF_LOCK, reg->id, ptr);
> > +               if (err < 0) {
> > +                       verbose(env, "Failed to acquire lock state\n");
> > +                       return err;
> > +               }
> > +               /* It is not safe to allow multiple bpf_spin_lock calls, so
> > +                * disallow them until this lock has been unlocked.
> > +                */
> > +               cur->active_lock = true;
> >         } else {
> >                 void *ptr;
> >
> > @@ -7747,20 +7815,18 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
> >                 else
> >                         ptr = btf;
> >
> > -               if (!cur->active_lock.ptr) {
> > +               if (!cur->active_lock) {
> >                         verbose(env, "bpf_spin_unlock without taking a lock\n");
> >                         return -EINVAL;
> >                 }
> > -               if (cur->active_lock.ptr != ptr ||
> > -                   cur->active_lock.id != reg->id) {
> > +
> > +               if (release_lock_state(cur_func(env), REF_TYPE_BPF_LOCK, reg->id, ptr)) {
> >                         verbose(env, "bpf_spin_unlock of different lock\n");
> >                         return -EINVAL;
> >                 }
> >
> >                 invalidate_non_owning_refs(env);
> > -
> > -               cur->active_lock.ptr = NULL;
> > -               cur->active_lock.id = 0;
> > +               cur->active_lock = false;
> >         }
> >         return 0;
> >  }
> > @@ -9818,7 +9884,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >                 const char *sub_name = subprog_name(env, subprog);
> >
> >                 /* Only global subprogs cannot be called with a lock held. */
> > -               if (env->cur_state->active_lock.ptr) {
> > +               if (env->cur_state->active_lock) {
> >                         verbose(env, "global function calls are not allowed while holding a lock,\n"
> >                                      "use static function instead\n");
> >                         return -EINVAL;
> > @@ -10343,6 +10409,8 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi
> >                 return 0;
> >
> >         for (i = 0; i < state->acquired_refs; i++) {
> > +               if (state->refs[i].type != REF_TYPE_PTR)
> > +                       continue;
>
> why ?
> check_reference_leak() should complain about unreleased locks too.
>

I'm trying to change that here. Locks will be checked with the
active_locks counter everywhere, reference types are separate, and
only REF_TYPE_PTR.
Now that we've unified everything in check_resource_leak, it should
not be problematic.

> >                 if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
> >                         continue;
> >                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
> > @@ -10356,7 +10424,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit
> >  {
> >         int err;
> >
> > -       if (check_lock && env->cur_state->active_lock.ptr) {
> > +       if (check_lock && env->cur_state->active_lock) {
> >                 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
> >                 return -EINVAL;
> >         }
> > @@ -11580,7 +11648,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state
> >         struct bpf_verifier_state *state = env->cur_state;
> >         struct btf_record *rec = reg_btf_record(reg);
> >
> > -       if (!state->active_lock.ptr) {
> > +       if (!state->active_lock) {
> >                 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
> >                 return -EFAULT;
> >         }
> > @@ -11677,6 +11745,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o
> >   */
> >  static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
> >  {
> > +       struct bpf_reference_state *s;
> >         void *ptr;
> >         u32 id;
> >
> > @@ -11693,10 +11762,10 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
> >         }
> >         id = reg->id;
> >
> > -       if (!env->cur_state->active_lock.ptr)
> > +       if (!env->cur_state->active_lock)
> >                 return -EINVAL;
> > -       if (env->cur_state->active_lock.ptr != ptr ||
> > -           env->cur_state->active_lock.id != id) {
> > +       s = find_lock_state(env, id, ptr);
> > +       if (!s) {
> >                 verbose(env, "held lock and object are not in the same allocation\n");
> >                 return -EINVAL;
> >         }
> > @@ -17561,8 +17630,19 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
> >                 return false;
> >
> >         for (i = 0; i < old->acquired_refs; i++) {
> > -               if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
> > +               if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
> > +                   old->refs[i].type != cur->refs[i].type)
> >                         return false;
> > +               switch (old->refs[i].type) {
> > +               case REF_TYPE_PTR:
> > +                       if (old->refs[i].callback_ref != cur->refs[i].callback_ref)
> > +                               return false;
> > +                       break;
> > +               default:
> > +                       if (old->refs[i].ptr != cur->refs[i].ptr)
> > +                               return false;
> > +                       break;
>
> I'd future proof a bit by explicitly handling all enum values
> and WARN_ONCE in default.
>

Ok.

> pw-bot: cr