Re: [PATCH bpf-next v3 01/13] bpf: Add support for non-fix-size percpu mem allocation

Hou Tao <houtao@xxxxxxxxxxxxxxx> · Wed, 13 Sep 2023 22:10:54 +0800

Hi,

On 8/27/2023 11:27 PM, Yonghong Song wrote:
> This is needed for later percpu mem allocation when the
> allocation is done by bpf program. For such cases, a global
> bpf_global_percpu_ma is added where a flexible allocation
> size is needed.
>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h   |  4 ++--
>  kernel/bpf/core.c     |  8 +++++---
>  kernel/bpf/memalloc.c | 14 ++++++--------
>  3 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 12596af59c00..144dbddf53bd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -55,8 +55,8 @@ struct cgroup;
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
>  extern struct kobject *btf_kobj;
> -extern struct bpf_mem_alloc bpf_global_ma;
> -extern bool bpf_global_ma_set;
> +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
>  
>  typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
>  typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 0f8f036d8bd1..95599df82ee4 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -64,8 +64,8 @@
>  #define OFF	insn->off
>  #define IMM	insn->imm
>  
> -struct bpf_mem_alloc bpf_global_ma;
> -bool bpf_global_ma_set;
> +struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
> +bool bpf_global_ma_set, bpf_global_percpu_ma_set;
>  
>  /* No hurry in this branch
>   *
> @@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void)
>  
>  	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
>  	bpf_global_ma_set = !ret;
> -	return ret;
> +	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
> +	bpf_global_percpu_ma_set = !ret;
> +	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
>  }
>  late_initcall(bpf_global_ma_init);
>  #endif
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 9c49ae53deaf..cb60445de98a 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -499,15 +499,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  	struct obj_cgroup *objcg = NULL;
>  	int cpu, i, unit_size, percpu_size = 0;
>  
> +	/* room for llist_node and per-cpu pointer */
> +	if (percpu)
> +		percpu_size = LLIST_NODE_SZ + sizeof(void *);
> +
>  	if (size) {
>  		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
>  		if (!pc)
>  			return -ENOMEM;
>  
> -		if (percpu)
> -			/* room for llist_node and per-cpu pointer */
> -			percpu_size = LLIST_NODE_SZ + sizeof(void *);
> -		else
> +		if (!percpu)
>  			size += LLIST_NODE_SZ; /* room for llist_node */
>  		unit_size = size;
>  
> @@ -527,10 +528,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  		return 0;
>  	}
>  
> -	/* size == 0 && percpu is an invalid combination */
> -	if (WARN_ON_ONCE(percpu))
> -		return -EINVAL;
> -
>  	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
>  	if (!pcc)
>  		return -ENOMEM;
> @@ -543,6 +540,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
>  			c = &cc->cache[i];
>  			c->unit_size = sizes[i];
>  			c->objcg = objcg;
> +			c->percpu_size = percpu_size;
>  			c->tgt = c;
>  			prefill_mem_cache(c, cpu);
>  		}

It would trigger the WARN_ON_ONCE in free_bulk().  Because for per-cpu
ptr returned by bpf memory allocator, ksize() in bpf_mem_free() will
return 16 constantly, so it will be unmatched with the unit_size of the
source bpf_mem_cache. Have not found a good solution for the problem yet
except disabling the warning for per-cpu bpf memory allocator, because
now per-cpu allocator doesn't have an API to return the size of the
returned ptr. Could we make the verifier to remember the size of the
allocator object and pass it to bpf memory allocator ?