On 11/13, Stanislav Fomichev wrote: > Commit ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD > and PERF_BPF_EVENT_PROG_UNLOAD") stopped removing program's id from > idr when the offloaded/bound netdev goes away. I was supposed to > take a look and check in [0], but apparently I did not. > > The purpose of idr removal is to avoid BPF_PROG_GET_NEXT_ID returning > stale ids for the programs that have a dead netdev. This functionality > is verified by test_offload.py, but we don't run this test in the CI. > > Introduce new bpf_prog_remove_from_idr which takes care of correctly > dealing with potential double idr_remove() via separate skip_idr_remove > flag in the aux. > > Verified by running the test manually: > test_offload.py: OK > > 0: https://lore.kernel.org/all/CAKH8qBtyR20ZWAc11z1-6pGb3Hd47AQUTbE_cfoktG59TqaJ7Q@xxxxxxxxxxxxxx/ > > Fixes: ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD") > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> > --- > include/linux/bpf.h | 2 ++ > kernel/bpf/offload.c | 3 +++ > kernel/bpf/syscall.c | 15 +++++++++++---- > 3 files changed, 16 insertions(+), 4 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 4001d11be151..d2aa4b59bf1e 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1414,6 +1414,7 @@ struct bpf_prog_aux { > bool xdp_has_frags; > bool exception_cb; > bool exception_boundary; > + bool skip_idr_remove; > /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ > const struct btf_type *attach_func_proto; > /* function name for valid attach_btf_id */ > @@ -2049,6 +2050,7 @@ void bpf_prog_inc(struct bpf_prog *prog); > struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); > void bpf_prog_put(struct bpf_prog *prog); > > +void bpf_prog_remove_from_idr(struct bpf_prog *prog); > void bpf_prog_free_id(struct bpf_prog *prog); > void bpf_map_free_id(struct bpf_map *map); > > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c > index 1a4fec330eaa..6f4fe492ee2a 100644 > --- a/kernel/bpf/offload.c > +++ b/kernel/bpf/offload.c > @@ -112,6 +112,9 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) > if (offload->dev_state) > offload->offdev->ops->destroy(prog); > > + /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ > + bpf_prog_remove_from_idr(prog); > + > list_del_init(&offload->offloads); > kfree(offload); > prog->aux->offload = NULL; > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 0ed286b8a0f0..bc813e03e2cf 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2083,10 +2083,19 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) > return id > 0 ? 0 : id; > } > > -void bpf_prog_free_id(struct bpf_prog *prog) > +void bpf_prog_remove_from_idr(struct bpf_prog *prog) > { > unsigned long flags; > > + spin_lock_irqsave(&prog_idr_lock, flags); > + if (!prog->aux->skip_idr_remove) > + idr_remove(&prog_idr, prog->aux->id); > + prog->aux->skip_idr_remove = 1; > + spin_unlock_irqrestore(&prog_idr_lock, flags); > +} > + > +void bpf_prog_free_id(struct bpf_prog *prog) > +{ > /* cBPF to eBPF migrations are currently not in the idr store. > * Offloaded programs are removed from the store when their device > * disappears - even if someone grabs an fd to them they are unusable, > @@ -2095,10 +2104,8 @@ void bpf_prog_free_id(struct bpf_prog *prog) > if (!prog->aux->id) > return; > > - spin_lock_irqsave(&prog_idr_lock, flags); > - idr_remove(&prog_idr, prog->aux->id); > + bpf_prog_remove_from_idr(prog); > prog->aux->id = 0; > - spin_unlock_irqrestore(&prog_idr_lock, flags); > } > > static void __bpf_prog_put_rcu(struct rcu_head *rcu) > -- > 2.42.0.869.gea05f2083d-goog > Same here, should have been CC'ed to netdev.