Commit ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD") stopped removing program's id from idr when the offloaded/bound netdev goes away. I was supposed to take a look and check in [0], but apparently I did not. The purpose of idr removal is to avoid BPF_PROG_GET_NEXT_ID returning stale ids for the programs that have a dead netdev. This functionality is verified by test_offload.py, but we don't run this test in the CI. Introduce new bpf_prog_remove_from_idr which takes care of correctly dealing with potential double idr_remove() via separate skip_idr_remove flag in the aux. Verified by running the test manually: test_offload.py: OK 0: https://lore.kernel.org/all/CAKH8qBtyR20ZWAc11z1-6pGb3Hd47AQUTbE_cfoktG59TqaJ7Q@xxxxxxxxxxxxxx/ Fixes: ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD") Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> --- include/linux/bpf.h | 2 ++ kernel/bpf/offload.c | 3 +++ kernel/bpf/syscall.c | 15 +++++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4001d11be151..d2aa4b59bf1e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1414,6 +1414,7 @@ struct bpf_prog_aux { bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool skip_idr_remove; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -2049,6 +2050,7 @@ void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); +void bpf_prog_remove_from_idr(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 1a4fec330eaa..6f4fe492ee2a 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -112,6 +112,9 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) offload->offdev->ops->destroy(prog); + /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ + bpf_prog_remove_from_idr(prog); + list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0ed286b8a0f0..bc813e03e2cf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2083,10 +2083,19 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -void bpf_prog_free_id(struct bpf_prog *prog) +void bpf_prog_remove_from_idr(struct bpf_prog *prog) { unsigned long flags; + spin_lock_irqsave(&prog_idr_lock, flags); + if (!prog->aux->skip_idr_remove) + idr_remove(&prog_idr, prog->aux->id); + prog->aux->skip_idr_remove = 1; + spin_unlock_irqrestore(&prog_idr_lock, flags); +} + +void bpf_prog_free_id(struct bpf_prog *prog) +{ /* cBPF to eBPF migrations are currently not in the idr store. * Offloaded programs are removed from the store when their device * disappears - even if someone grabs an fd to them they are unusable, @@ -2095,10 +2104,8 @@ void bpf_prog_free_id(struct bpf_prog *prog) if (!prog->aux->id) return; - spin_lock_irqsave(&prog_idr_lock, flags); - idr_remove(&prog_idr, prog->aux->id); + bpf_prog_remove_from_idr(prog); prog->aux->id = 0; - spin_unlock_irqrestore(&prog_idr_lock, flags); } static void __bpf_prog_put_rcu(struct rcu_head *rcu) -- 2.42.0.869.gea05f2083d-goog