On Sun, Aug 27, 2023 at 08:27:34AM -0700, Yonghong Song wrote: > This is needed for later percpu mem allocation when the > allocation is done by bpf program. For such cases, a global > bpf_global_percpu_ma is added where a flexible allocation > size is needed. > > Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx> > --- > include/linux/bpf.h | 4 ++-- > kernel/bpf/core.c | 8 +++++--- > kernel/bpf/memalloc.c | 14 ++++++-------- > 3 files changed, 13 insertions(+), 13 deletions(-) Both Marc and Mikhail reported out-of-memory conditions on s390 machines, and bisected it down to this upstream commit 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation"). This seems to eat up a lot of memory only based on the number of possible CPUs. If we have a machine with 8GB, 6 present CPUs and 512 possible CPUs (yes, this is a realistic scenario) the memory consumption directly after boot is: $ cat /sys/devices/system/cpu/present 0-5 $ cat /sys/devices/system/cpu/possible 0-511 Before this commit: $ cat /proc/meminfo MemTotal: 8141924 kB MemFree: 7639872 kB With this commit $ cat /proc/meminfo MemTotal: 8141924 kB MemFree: 4852248 kB So, this appears to be a significant regression. I'm quoting the rest of the original patch below for reference only. > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 12596af59c00..144dbddf53bd 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -55,8 +55,8 @@ struct cgroup; > extern struct idr btf_idr; > extern spinlock_t btf_idr_lock; > extern struct kobject *btf_kobj; > -extern struct bpf_mem_alloc bpf_global_ma; > -extern bool bpf_global_ma_set; > +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; > +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; > > typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); > typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 0f8f036d8bd1..95599df82ee4 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -64,8 +64,8 @@ > #define OFF insn->off > #define IMM insn->imm > > -struct bpf_mem_alloc bpf_global_ma; > -bool bpf_global_ma_set; > +struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; > +bool bpf_global_ma_set, bpf_global_percpu_ma_set; > > /* No hurry in this branch > * > @@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void) > > ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); > bpf_global_ma_set = !ret; > - return ret; > + ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); > + bpf_global_percpu_ma_set = !ret; > + return !bpf_global_ma_set || !bpf_global_percpu_ma_set; > } > late_initcall(bpf_global_ma_init); > #endif > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c > index 9c49ae53deaf..cb60445de98a 100644 > --- a/kernel/bpf/memalloc.c > +++ b/kernel/bpf/memalloc.c > @@ -499,15 +499,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) > struct obj_cgroup *objcg = NULL; > int cpu, i, unit_size, percpu_size = 0; > > + /* room for llist_node and per-cpu pointer */ > + if (percpu) > + percpu_size = LLIST_NODE_SZ + sizeof(void *); > + > if (size) { > pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); > if (!pc) > return -ENOMEM; > > - if (percpu) > - /* room for llist_node and per-cpu pointer */ > - percpu_size = LLIST_NODE_SZ + sizeof(void *); > - else > + if (!percpu) > size += LLIST_NODE_SZ; /* room for llist_node */ > unit_size = size; > > @@ -527,10 +528,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) > return 0; > } > > - /* size == 0 && percpu is an invalid combination */ > - if (WARN_ON_ONCE(percpu)) > - return -EINVAL; > - > pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); > if (!pcc) > return -ENOMEM; > @@ -543,6 +540,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) > c = &cc->cache[i]; > c->unit_size = sizes[i]; > c->objcg = objcg; > + c->percpu_size = percpu_size; > c->tgt = c; > prefill_mem_cache(c, cpu); > } > -- > 2.34.1 >