On 11/10/2023 2:17 PM, Yonghong Song wrote: > Kirill Shutemov reported significant percpu memory increase after booting > in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for > non-fix-size percpu mem allocation"). The percpu memory is increased > from 111MB to 969MB. The number is from /proc/meminfo. > > I tried to reproduce the issue with my local VM which at most supports > upto 255 cpus. With 252 cpus, without the above commit, the percpu memory > immediately after boot is 57MB while with the above commit the percpu > memory is 231MB. > > This is not good since so far percpu memory from bpf memory allocator > is not widely used yet. Let us change pre-allocation in init stage > to on-demand allocation when verifier detects there is a need of > percpu memory for bpf program. With this change, percpu memory > consumption after boot can be reduced signicantly. > > [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@xxxxxxxxxxxxxxxxx/ > > Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation") > Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx> > Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx> > --- > include/linux/bpf.h | 2 +- > kernel/bpf/core.c | 8 +++----- > kernel/bpf/verifier.c | 17 +++++++++++++++-- > 3 files changed, 19 insertions(+), 8 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index b4825d3cdb29..3df67a04d32e 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -56,7 +56,7 @@ extern struct idr btf_idr; > extern spinlock_t btf_idr_lock; > extern struct kobject *btf_kobj; > extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; > -extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; > +extern bool bpf_global_ma_set; > > typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); > typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 08626b519ce2..cd3afe57ece3 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -64,8 +64,8 @@ > #define OFF insn->off > #define IMM insn->imm > > -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; > -bool bpf_global_ma_set, bpf_global_percpu_ma_set; > +struct bpf_mem_alloc bpf_global_ma; > +bool bpf_global_ma_set; > > /* No hurry in this branch > * > @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void) > > ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); > bpf_global_ma_set = !ret; > - ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); > - bpf_global_percpu_ma_set = !ret; > - return !bpf_global_ma_set || !bpf_global_percpu_ma_set; > + return ret; > } > late_initcall(bpf_global_ma_init); > #endif > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index bd1c42eb540f..7d485c8b794f 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -26,6 +26,7 @@ > #include <linux/poison.h> > #include <linux/module.h> > #include <linux/cpumask.h> > +#include <linux/bpf_mem_alloc.h> > #include <net/xdp.h> > > #include "disasm.h" > @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { > #undef BPF_LINK_TYPE > }; > > +struct bpf_mem_alloc bpf_global_percpu_ma; > +static bool bpf_global_percpu_ma_set; > + > /* bpf_check() is a static code analyzer that walks eBPF program > * instruction by instruction and updates register/stack state. > * All paths of conditional branches are analyzed until 'bpf_exit' insn. > @@ -12074,8 +12078,17 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) > return -ENOMEM; > > - if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set) > - return -ENOMEM; > + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { > + mutex_lock(&bpf_verifier_lock); Instead of acquiring the global lock each time, can we test whether or bpf_global_percpu_ma_set is set before acquiring the global lock ? > + if (!bpf_global_percpu_ma_set) { > + err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); > + if (!err) > + bpf_global_percpu_ma_set = true; > + } > + mutex_unlock(&bpf_verifier_lock); > + if (err) > + return err; > + } > > if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { > verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");