Re: [PATCH bpf-next v3] bpf: Do not allocate percpu memory at init stage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 11/11/2023 9:39 AM, Yonghong Song wrote:
> Kirill Shutemov reported significant percpu memory consumption increase after
> booting in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for
> non-fix-size percpu mem allocation"). The percpu memory consumption is
> increased from 111MB to 969MB. The number is from /proc/meminfo.
>
> I tried to reproduce the issue with my local VM which at most supports upto
> 255 cpus. With 252 cpus, without the above commit, the percpu memory
> consumption immediately after boot is 57MB while with the above commit the
> percpu memory consumption is 231MB.
>
> This is not good since so far percpu memory from bpf memory allocator is not
> widely used yet. Let us change pre-allocation in init stage to on-demand
> allocation when verifier detects there is a need of percpu memory for bpf
> program. With this change, percpu memory consumption after boot can be reduced
> signicantly.
>
>   [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@xxxxxxxxxxxxxxxxx/
>
> Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation")
> Reported-and-tested-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h   |  2 +-
>  kernel/bpf/core.c     |  8 +++-----
>  kernel/bpf/verifier.c | 20 ++++++++++++++++++--
>  3 files changed, 22 insertions(+), 8 deletions(-)
>
> Changelog:
>   v2 -> v3:
>     - Use dedicated mutex lock (bpf_percpu_ma_lock)
>   v1 -> v2:
>     - Add proper Reported-and-tested-by tag.
>     - Do a check of !bpf_global_percpu_ma_set before acquiring verifier_lock.
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 35bff17396c0..6762dac3ef76 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -56,7 +56,7 @@ extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
>  extern struct kobject *btf_kobj;
>  extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> -extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
> +extern bool bpf_global_ma_set;
>  
>  typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
>  typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 08626b519ce2..cd3afe57ece3 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -64,8 +64,8 @@
>  #define OFF	insn->off
>  #define IMM	insn->imm
>  
> -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> -bool bpf_global_ma_set, bpf_global_percpu_ma_set;
> +struct bpf_mem_alloc bpf_global_ma;
> +bool bpf_global_ma_set;
>  
>  /* No hurry in this branch
>   *
> @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
>  
>  	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
>  	bpf_global_ma_set = !ret;
> -	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> -	bpf_global_percpu_ma_set = !ret;
> -	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
> +	return ret;
>  }
>  late_initcall(bpf_global_ma_init);
>  #endif
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index a2267d5ed14e..6da370a047fe 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -26,6 +26,7 @@
>  #include <linux/poison.h>
>  #include <linux/module.h>
>  #include <linux/cpumask.h>
> +#include <linux/bpf_mem_alloc.h>
>  #include <net/xdp.h>
>  
>  #include "disasm.h"
> @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>  #undef BPF_LINK_TYPE
>  };
>  
> +struct bpf_mem_alloc bpf_global_percpu_ma;
> +static bool bpf_global_percpu_ma_set;
> +
>  /* bpf_check() is a static code analyzer that walks eBPF program
>   * instruction by instruction and updates register/stack state.
>   * All paths of conditional branches are analyzed until 'bpf_exit' insn.
> @@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta {
>  struct btf *btf_vmlinux;
>  
>  static DEFINE_MUTEX(bpf_verifier_lock);
> +static DEFINE_MUTEX(bpf_percpu_ma_lock);
>  
>  static const struct bpf_line_info *
>  find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
> @@ -12091,8 +12096,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
>  					return -ENOMEM;
>  
> -				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
> -					return -ENOMEM;
> +				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
> +					if (!bpf_global_percpu_ma_set) {
> +						mutex_lock(&bpf_percpu_ma_lock);
> +						if (!bpf_global_percpu_ma_set) {
> +							err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> +							if (!err)
> +								bpf_global_percpu_ma_set = true;
> +						}

A dumb question here: do we need some memory barrier to guarantee the
memory order between bpf_global_percpu_ma_set and bpf_global_percpu_ma ?
> +						mutex_unlock(&bpf_percpu_ma_lock);
> +						if (err)
> +							return err;
> +					}
> +				}
>  
>  				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
>  					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux