From: Hou Tao <houtao1@xxxxxxxxxx> The following warning was reported when running "./test_progs -t test_bpf_ma/percpu_free_through_map_free": ------------[ cut here ]------------ WARNING: CPU: 1 PID: 68 at kernel/bpf/memalloc.c:342 CPU: 1 PID: 68 Comm: kworker/u16:2 Not tainted 6.6.0-rc2+ #222 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Workqueue: events_unbound bpf_map_free_deferred RIP: 0010:bpf_mem_refill+0x21c/0x2a0 ...... Call Trace: <IRQ> ? bpf_mem_refill+0x21c/0x2a0 irq_work_single+0x27/0x70 irq_work_run_list+0x2a/0x40 irq_work_run+0x18/0x40 __sysvec_irq_work+0x1c/0xc0 sysvec_irq_work+0x73/0x90 </IRQ> <TASK> asm_sysvec_irq_work+0x1b/0x20 RIP: 0010:unit_free+0x50/0x80 ...... bpf_mem_free+0x46/0x60 __bpf_obj_drop_impl+0x40/0x90 bpf_obj_free_fields+0x17d/0x1a0 array_map_free+0x6b/0x170 bpf_map_free_deferred+0x54/0xa0 process_scheduled_works+0xba/0x370 worker_thread+0x16d/0x2e0 kthread+0x105/0x140 ret_from_fork+0x39/0x60 ret_from_fork_asm+0x1b/0x30 </TASK> ---[ end trace 0000000000000000 ]--- The reason is simple: __bpf_obj_drop_impl() does not know the freeing field is a per-cpu pointer and it uses bpf_global_ma to free the pointer. Because bpf_global_ma is not a per-cpu allocator, so ksize() is used to select the corresponding cache. The bpf_mem_cache with 16-bytes unit_size will always be selected to do the unmatched free and it will trigger the warning in free_bulk() eventually. Because per-cpu kptr doesn't support list or rb-tree now, so fix the problem by only checking whether or not the type of kptr is per-cpu in bpf_obj_free_fields(), and using bpf_global_percpu_ma to these kptrs. Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- kernel/bpf/helpers.c | 22 ++++++++++++++-------- kernel/bpf/internal.h | 2 +- kernel/bpf/syscall.c | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 07f49f8831c0..078217c921e8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1840,7 +1840,7 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, * bpf_list_head which needs to be freed. */ migrate_disable(); - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } @@ -1879,7 +1879,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, migrate_disable(); - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); migrate_enable(); } } @@ -1911,8 +1911,10 @@ __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) } /* Must be called under migrate_disable(), as required by bpf_mem_free */ -void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) { + struct bpf_mem_alloc *ma; + if (rec && rec->refcount_off >= 0 && !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { /* Object is refcounted and refcount_dec didn't result in 0 @@ -1924,10 +1926,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) if (rec) bpf_obj_free_fields(rec, p); + if (percpu) + ma = &bpf_global_percpu_ma; + else + ma = &bpf_global_ma; if (rec && rec->refcount_off >= 0) - bpf_mem_free_rcu(&bpf_global_ma, p); + bpf_mem_free_rcu(ma, p); else - bpf_mem_free(&bpf_global_ma, p); + bpf_mem_free(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) @@ -1935,7 +1941,7 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; - __bpf_obj_drop_impl(p, meta ? meta->record : NULL); + __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false); } __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) @@ -1979,7 +1985,7 @@ static int __bpf_list_add(struct bpf_list_node_kern *node, */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl((void *)n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } @@ -2078,7 +2084,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, */ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl((void *)n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL; } diff --git a/kernel/bpf/internal.h b/kernel/bpf/internal.h index e233ea83eb0a..4c3cfdd6e6a2 100644 --- a/kernel/bpf/internal.h +++ b/kernel/bpf/internal.h @@ -6,6 +6,6 @@ struct btf_record; -void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); #endif /* __BPF_INTERNAL_H_ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7de4b9f97c8f..8dfc5d39c91d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -662,8 +662,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) field->kptr.btf_id); migrate_disable(); __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? - pointee_struct_meta->record : - NULL); + pointee_struct_meta->record : NULL, + fields[i].type == BPF_KPTR_PERCPU); migrate_enable(); } else { field->kptr.dtor(xchgd_field); -- 2.29.2