Re: [PATCH bpf-next v3 01/13] bpf: Add support for non-fix-size percpu mem allocation

Heiko Carstens <hca@xxxxxxxxxxxxx> · Wed, 15 Nov 2023 16:31:39 +0100

On Sun, Aug 27, 2023 at 08:27:34AM -0700, Yonghong Song wrote:
> This is needed for later percpu mem allocation when the
> allocation is done by bpf program. For such cases, a global
> bpf_global_percpu_ma is added where a flexible allocation
> size is needed.
> 
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h   |  4 ++--
>  kernel/bpf/core.c     |  8 +++++---
>  kernel/bpf/memalloc.c | 14 ++++++--------
>  3 files changed, 13 insertions(+), 13 deletions(-)

Both Marc and Mikhail reported out-of-memory conditions on s390 machines,
and bisected it down to this upstream commit 41a5db8d8161 ("bpf: Add
support for non-fix-size percpu mem allocation").
This seems to eat up a lot of memory only based on the number of possible
CPUs.

If we have a machine with 8GB, 6 present CPUs and 512 possible CPUs (yes,
this is a realistic scenario) the memory consumption directly after boot
is:

$ cat /sys/devices/system/cpu/present
0-5
$ cat /sys/devices/system/cpu/possible
0-511

Before this commit:

$ cat /proc/meminfo
MemTotal:        8141924 kB
MemFree:         7639872 kB

With this commit

$ cat /proc/meminfo
MemTotal:        8141924 kB
MemFree:         4852248 kB

So, this appears to be a significant regression.
I'm quoting the rest of the original patch below for reference only.

> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 12596af59c00..144dbddf53bd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -55,8 +55,8 @@ struct cgroup;
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
>  extern struct kobject *btf_kobj;
> -extern struct bpf_mem_alloc bpf_global_ma;
> -extern bool bpf_global_ma_set;
> +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
>  
>  typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
>  typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 0f8f036d8bd1..95599df82ee4 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -64,8 +64,8 @@
>  #define OFF	insn->off
>  #define IMM	insn->imm
>  
> -struct bpf_mem_alloc bpf_global_ma;
> -bool bpf_global_ma_set;
> +struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> +bool bpf_global_ma_set, bpf_global_percpu_ma_set;
>  
>  /* No hurry in this branch
>   *
> @@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void)
>  
>  	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
>  	bpf_global_ma_set = !ret;
> -	return ret;
> +	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> +	bpf_global_percpu_ma_set = !ret;
> +	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
>  }
>  late_initcall(bpf_global_ma_init);
>  #endif
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 9c49ae53deaf..cb60445de98a 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -499,15 +499,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  	struct obj_cgroup *objcg = NULL;
>  	int cpu, i, unit_size, percpu_size = 0;
>  
> +	/* room for llist_node and per-cpu pointer */
> +	if (percpu)
> +		percpu_size = LLIST_NODE_SZ + sizeof(void *);
> +
>  	if (size) {
>  		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
>  		if (!pc)
>  			return -ENOMEM;
>  
> -		if (percpu)
> -			/* room for llist_node and per-cpu pointer */
> -			percpu_size = LLIST_NODE_SZ + sizeof(void *);
> -		else
> +		if (!percpu)
>  			size += LLIST_NODE_SZ; /* room for llist_node */
>  		unit_size = size;
>  
> @@ -527,10 +528,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  		return 0;
>  	}
>  
> -	/* size == 0 && percpu is an invalid combination */
> -	if (WARN_ON_ONCE(percpu))
> -		return -EINVAL;
> -
>  	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
>  	if (!pcc)
>  		return -ENOMEM;
> @@ -543,6 +540,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  			c = &cc->cache[i];
>  			c->unit_size = sizes[i];
>  			c->objcg = objcg;
> +			c->percpu_size = percpu_size;
>  			c->tgt = c;
>  			prefill_mem_cache(c, cpu);
>  		}
> -- 
> 2.34.1
>