Re: [PATCH bpf] bpf: Do not allocate percpu memory at init stage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 11/10/2023 2:17 PM, Yonghong Song wrote:
> Kirill Shutemov reported significant percpu memory increase after booting
> in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for
> non-fix-size percpu mem allocation"). The percpu memory is increased
> from 111MB to 969MB. The number is from /proc/meminfo.
>
> I tried to reproduce the issue with my local VM which at most supports
> upto 255 cpus. With 252 cpus, without the above commit, the percpu memory
> immediately after boot is 57MB while with the above commit the percpu
> memory is 231MB.
>
> This is not good since so far percpu memory from bpf memory allocator
> is not widely used yet. Let us change pre-allocation in init stage
> to on-demand allocation when verifier detects there is a need of
> percpu memory for bpf program. With this change, percpu memory
> consumption after boot can be reduced signicantly.
>
>   [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@xxxxxxxxxxxxxxxxx/
>
> Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation")
> Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h   |  2 +-
>  kernel/bpf/core.c     |  8 +++-----
>  kernel/bpf/verifier.c | 17 +++++++++++++++--
>  3 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index b4825d3cdb29..3df67a04d32e 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -56,7 +56,7 @@ extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
>  extern struct kobject *btf_kobj;
>  extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> -extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
> +extern bool bpf_global_ma_set;
>  
>  typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
>  typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 08626b519ce2..cd3afe57ece3 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -64,8 +64,8 @@
>  #define OFF	insn->off
>  #define IMM	insn->imm
>  
> -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> -bool bpf_global_ma_set, bpf_global_percpu_ma_set;
> +struct bpf_mem_alloc bpf_global_ma;
> +bool bpf_global_ma_set;
>  
>  /* No hurry in this branch
>   *
> @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
>  
>  	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
>  	bpf_global_ma_set = !ret;
> -	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> -	bpf_global_percpu_ma_set = !ret;
> -	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
> +	return ret;
>  }
>  late_initcall(bpf_global_ma_init);
>  #endif
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index bd1c42eb540f..7d485c8b794f 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -26,6 +26,7 @@
>  #include <linux/poison.h>
>  #include <linux/module.h>
>  #include <linux/cpumask.h>
> +#include <linux/bpf_mem_alloc.h>
>  #include <net/xdp.h>
>  
>  #include "disasm.h"
> @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>  #undef BPF_LINK_TYPE
>  };
>  
> +struct bpf_mem_alloc bpf_global_percpu_ma;
> +static bool bpf_global_percpu_ma_set;
> +
>  /* bpf_check() is a static code analyzer that walks eBPF program
>   * instruction by instruction and updates register/stack state.
>   * All paths of conditional branches are analyzed until 'bpf_exit' insn.
> @@ -12074,8 +12078,17 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
>  					return -ENOMEM;
>  
> -				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
> -					return -ENOMEM;
> +				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
> +					mutex_lock(&bpf_verifier_lock);

Instead of acquiring the global lock each time, can we test whether or
bpf_global_percpu_ma_set is set before acquiring the global lock ?
> +					if (!bpf_global_percpu_ma_set) {
> +						err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> +						if (!err)
> +							bpf_global_percpu_ma_set = true;
> +					}
> +					mutex_unlock(&bpf_verifier_lock);
> +					if (err)
> +						return err;
> +				}
>  
>  				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
>  					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux