On Mon, Nov 4, 2024 at 7:17 AM Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote: > > When bpf_spin_lock was introduced originally, there was deliberation on > whether to use an array of lock IDs, but since bpf_spin_lock is limited > to holding a single lock at any given time, we've been using a single ID > to identify the held lock. > > In preparation for introducing spin locks that can be taken multiple > times, introduce support for acquiring multiple lock IDs. For this > purpose, reuse the acquired_refs array and store both lock and pointer > references. We tag the entry with REF_TYPE_PTR or REF_TYPE_BPF_LOCK to > disambiguate and find the relevant entry. The ptr field is used to track > the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be > matched with protected fields within the same "allocation", i.e. > bpf_obj_new object or map value. > > The struct active_lock is changed to a boolean as the state is part of > the acquired_refs array, and we only need active_lock as a cheap way > of detecting lock presence. > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> > --- > Changelog: > v2 -> v3 > v2: https://lore.kernel.org/bpf/20241103212252.547071-1-memxor@xxxxxxxxx > > * Rebase on bpf-next to resolve merge conflict > > v1 -> v2 > v1: https://lore.kernel.org/bpf/20241103205856.345580-1-memxor@xxxxxxxxx > > * Fix refsafe state comparison to check callback_ref and ptr separately. > --- > include/linux/bpf_verifier.h | 34 ++++++--- > kernel/bpf/verifier.c | 138 ++++++++++++++++++++++++++--------- > 2 files changed, 126 insertions(+), 46 deletions(-) > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h > index 4513372c5bc8..1e7e1803d78b 100644 > --- a/include/linux/bpf_verifier.h > +++ b/include/linux/bpf_verifier.h > @@ -266,6 +266,10 @@ struct bpf_stack_state { > }; > > struct bpf_reference_state { > + /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to > + * default to pointer reference on zero initialization of a state. > + */ > + enum { REF_TYPE_PTR = 0, REF_TYPE_BPF_LOCK } type; In the future commit two more values will be added, right? And it may push it over line limit. I think cleaner to do each value on separate line: enum ref_state_type { REF_TYPE_PTR = 0, REF_TYPE_BPF_LOCK } type; easier to read and less churn. Also name that enum type to use later. Also I'd drop _BPF_ from the middle. Just REF_TYPE_LOCK would do. > /* Track each reference created with a unique id, even if the same > * instruction creates the reference multiple times (eg, via CALL). > */ > @@ -274,17 +278,23 @@ struct bpf_reference_state { > * is used purely to inform the user of a reference leak. > */ > int insn_idx; > - /* There can be a case like: > - * main (frame 0) > - * cb (frame 1) > - * func (frame 3) > - * cb (frame 4) > - * Hence for frame 4, if callback_ref just stored boolean, it would be > - * impossible to distinguish nested callback refs. Hence store the > - * frameno and compare that to callback_ref in check_reference_leak when > - * exiting a callback function. > - */ > - int callback_ref; > + union { > + /* There can be a case like: > + * main (frame 0) > + * cb (frame 1) > + * func (frame 3) > + * cb (frame 4) > + * Hence for frame 4, if callback_ref just stored boolean, it would be > + * impossible to distinguish nested callback refs. Hence store the > + * frameno and compare that to callback_ref in check_reference_leak when > + * exiting a callback function. > + */ > + int callback_ref; > + /* Use to keep track of the source object of a lock, to ensure > + * it matches on unlock. > + */ > + void *ptr; > + }; > }; > > struct bpf_retval_range { > @@ -434,7 +444,7 @@ struct bpf_verifier_state { > u32 insn_idx; > u32 curframe; > > - struct bpf_active_lock active_lock; remove struct bpf_active_lock as well. > + bool active_lock; In the next patch it becomes 'int', so let's make it 'int' right away and move it to bpf_func_state next to: int acquired_refs; struct bpf_reference_state *refs; since it's counting locks in this array. 'bool' is just a weird from of 1 or 0. So 'int' is cleaner in this patch too. > bool speculative; > bool active_rcu_lock; > u32 active_preempt_lock; > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index ba800c7611e3..ea8ad320e6cc 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -1335,6 +1335,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) > if (err) > return err; > id = ++env->id_gen; > + state->refs[new_ofs].type = REF_TYPE_PTR; > state->refs[new_ofs].id = id; > state->refs[new_ofs].insn_idx = insn_idx; > state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; > @@ -1342,6 +1343,23 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) > return id; > } > > +static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, int type, int id, void *ptr) enum ref_state_type instead of 'int type' ? > +{ > + struct bpf_func_state *state = cur_func(env); > + int new_ofs = state->acquired_refs; > + int err; > + > + err = resize_reference_state(state, state->acquired_refs + 1); > + if (err) > + return err; > + state->refs[new_ofs].type = type; > + state->refs[new_ofs].id = id; > + state->refs[new_ofs].insn_idx = insn_idx; > + state->refs[new_ofs].ptr = ptr; > + > + return 0; > +} > + > /* release function corresponding to acquire_reference_state(). Idempotent. */ > static int release_reference_state(struct bpf_func_state *state, int ptr_id) > { > @@ -1349,6 +1367,8 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) > > last_idx = state->acquired_refs - 1; > for (i = 0; i < state->acquired_refs; i++) { > + if (state->refs[i].type != REF_TYPE_PTR) > + continue; > if (state->refs[i].id == ptr_id) { > /* Cannot release caller references in callbacks */ > if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) > @@ -1364,6 +1384,43 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) > return -EINVAL; > } > > +static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr) > +{ > + int i, last_idx; > + > + last_idx = state->acquired_refs - 1; > + for (i = 0; i < state->acquired_refs; i++) { > + if (state->refs[i].type != type) > + continue; > + if (state->refs[i].id == id && state->refs[i].ptr == ptr) { > + if (last_idx && i != last_idx) > + memcpy(&state->refs[i], &state->refs[last_idx], > + sizeof(*state->refs)); > + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); > + state->acquired_refs--; > + return 0; > + } > + } > + return -EINVAL; > +} > + > +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, int id, void *ptr) > +{ > + struct bpf_func_state *state = cur_func(env); > + int i; > + > + for (i = 0; i < state->acquired_refs; i++) { > + struct bpf_reference_state *s = &state->refs[i]; > + > + if (s->type == REF_TYPE_PTR) > + continue; Let's pass 'enum ref_state_type type' and compare here to make it similar to release_lock_state(). I think it will clean up future patches too. > + > + if (s->id == id && s->ptr == ptr) > + return s; > + } > + return NULL; > +} > + > static void free_func_state(struct bpf_func_state *state) > { > if (!state) > @@ -1430,12 +1487,11 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, > dst_state->frame[i] = NULL; > } > dst_state->speculative = src->speculative; > + dst_state->active_lock = src->active_lock; > dst_state->active_rcu_lock = src->active_rcu_lock; > dst_state->active_preempt_lock = src->active_preempt_lock; > dst_state->in_sleepable = src->in_sleepable; > dst_state->curframe = src->curframe; > - dst_state->active_lock.ptr = src->active_lock.ptr; > - dst_state->active_lock.id = src->active_lock.id; > dst_state->branches = src->branches; > dst_state->parent = src->parent; > dst_state->first_insn_idx = src->first_insn_idx; > @@ -5423,7 +5479,7 @@ static bool in_sleepable(struct bpf_verifier_env *env) > static bool in_rcu_cs(struct bpf_verifier_env *env) > { > return env->cur_state->active_rcu_lock || > - env->cur_state->active_lock.ptr || > + env->cur_state->active_lock || > !in_sleepable(env); > } > > @@ -7698,6 +7754,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, > struct bpf_map *map = NULL; > struct btf *btf = NULL; > struct btf_record *rec; > + int err; > > if (!is_const) { > verbose(env, > @@ -7729,16 +7786,27 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, > return -EINVAL; > } > if (is_lock) { > - if (cur->active_lock.ptr) { > + void *ptr; > + > + if (map) > + ptr = map; > + else > + ptr = btf; > + > + if (cur->active_lock) { > verbose(env, > "Locking two bpf_spin_locks are not allowed\n"); > return -EINVAL; > } > - if (map) > - cur->active_lock.ptr = map; > - else > - cur->active_lock.ptr = btf; > - cur->active_lock.id = reg->id; > + err = acquire_lock_state(env, env->insn_idx, REF_TYPE_BPF_LOCK, reg->id, ptr); > + if (err < 0) { > + verbose(env, "Failed to acquire lock state\n"); > + return err; > + } > + /* It is not safe to allow multiple bpf_spin_lock calls, so > + * disallow them until this lock has been unlocked. > + */ > + cur->active_lock = true; > } else { > void *ptr; > > @@ -7747,20 +7815,18 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, > else > ptr = btf; > > - if (!cur->active_lock.ptr) { > + if (!cur->active_lock) { > verbose(env, "bpf_spin_unlock without taking a lock\n"); > return -EINVAL; > } > - if (cur->active_lock.ptr != ptr || > - cur->active_lock.id != reg->id) { > + > + if (release_lock_state(cur_func(env), REF_TYPE_BPF_LOCK, reg->id, ptr)) { > verbose(env, "bpf_spin_unlock of different lock\n"); > return -EINVAL; > } > > invalidate_non_owning_refs(env); > - > - cur->active_lock.ptr = NULL; > - cur->active_lock.id = 0; > + cur->active_lock = false; > } > return 0; > } > @@ -9818,7 +9884,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > const char *sub_name = subprog_name(env, subprog); > > /* Only global subprogs cannot be called with a lock held. */ > - if (env->cur_state->active_lock.ptr) { > + if (env->cur_state->active_lock) { > verbose(env, "global function calls are not allowed while holding a lock,\n" > "use static function instead\n"); > return -EINVAL; > @@ -10343,6 +10409,8 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi > return 0; > > for (i = 0; i < state->acquired_refs; i++) { > + if (state->refs[i].type != REF_TYPE_PTR) > + continue; why ? check_reference_leak() should complain about unreleased locks too. > if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) > continue; > verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", > @@ -10356,7 +10424,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit > { > int err; > > - if (check_lock && env->cur_state->active_lock.ptr) { > + if (check_lock && env->cur_state->active_lock) { > verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); > return -EINVAL; > } > @@ -11580,7 +11648,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state > struct bpf_verifier_state *state = env->cur_state; > struct btf_record *rec = reg_btf_record(reg); > > - if (!state->active_lock.ptr) { > + if (!state->active_lock) { > verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); > return -EFAULT; > } > @@ -11677,6 +11745,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o > */ > static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) > { > + struct bpf_reference_state *s; > void *ptr; > u32 id; > > @@ -11693,10 +11762,10 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ > } > id = reg->id; > > - if (!env->cur_state->active_lock.ptr) > + if (!env->cur_state->active_lock) > return -EINVAL; > - if (env->cur_state->active_lock.ptr != ptr || > - env->cur_state->active_lock.id != id) { > + s = find_lock_state(env, id, ptr); > + if (!s) { > verbose(env, "held lock and object are not in the same allocation\n"); > return -EINVAL; > } > @@ -17561,8 +17630,19 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, > return false; > > for (i = 0; i < old->acquired_refs; i++) { > - if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) > + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || > + old->refs[i].type != cur->refs[i].type) > return false; > + switch (old->refs[i].type) { > + case REF_TYPE_PTR: > + if (old->refs[i].callback_ref != cur->refs[i].callback_ref) > + return false; > + break; > + default: > + if (old->refs[i].ptr != cur->refs[i].ptr) > + return false; > + break; I'd future proof a bit by explicitly handling all enum values and WARN_ONCE in default. pw-bot: cr