Re: [PATCH v1 bpf-next 8/9] bpf: Centralize btf_field-specific initialization logic

Alexei Starovoitov <alexei.starovoitov@xxxxxxxxx> · Thu, 13 Apr 2023 16:04:07 -0700

On Mon, Apr 10, 2023 at 12:07:52PM -0700, Dave Marchevsky wrote:
> All btf_fields in an object are 0-initialized by memset in
> bpf_obj_init. This might not be a valid initial state for some field
> types, in which case kfuncs that use the type will properly initialize
> their input if it's been 0-initialized. Some BPF graph collection types
> and kfuncs do this: bpf_list_{head,node} and bpf_rb_node.
> 
> An earlier patch in this series added the bpf_refcount field, for which
> the 0 state indicates that the refcounted object should be free'd.
> bpf_obj_init treats this field specially, setting refcount to 1 instead
> of relying on scattered "refcount is 0? Must have just been initialized,
> let's set to 1" logic in kfuncs.
> 
> This patch extends this treatment to list and rbtree field types,
> allowing most scattered initialization logic in kfuncs to be removed.
> 
> Note that bpf_{list_head,rb_root} may be inside a BPF map, in which case
> they'll be 0-initialized without passing through the newly-added logic,
> so scattered initialization logic must remain for these collection root
> types.
> 
> Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
> ---
>  include/linux/bpf.h  | 38 ++++++++++++++++++++++++++++++++++----
>  kernel/bpf/helpers.c | 17 +++++++----------
>  2 files changed, 41 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4fc29f9aeaac..8e69948c4adb 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -355,6 +355,39 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
>  	}
>  }
>  
> +static inline void __bpf_obj_init_field(enum btf_field_type type, u32 size, void *addr)
> +{
> +	memset(addr, 0, size);
> +
> +	switch (type) {
> +	case BPF_REFCOUNT:
> +		refcount_set((refcount_t *)addr, 1);
> +		break;
> +	case BPF_RB_NODE:
> +		RB_CLEAR_NODE((struct rb_node *)addr);
> +		break;
> +	case BPF_LIST_HEAD:
> +	case BPF_LIST_NODE:
> +		INIT_LIST_HEAD((struct list_head *)addr);
> +		break;
> +	case BPF_RB_ROOT:
> +		/* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
> +	case BPF_SPIN_LOCK:
> +	case BPF_TIMER:
> +	case BPF_KPTR_UNREF:
> +	case BPF_KPTR_REF:
> +		break;
> +	default:
> +		WARN_ON_ONCE(1);
> +		return;
> +	}
> +}
> +
> +static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
> +{
> +	__bpf_obj_init_field(field->type, field->size, addr);
> +}
> +
>  static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
>  {
>  	if (IS_ERR_OR_NULL(rec))
> @@ -369,10 +402,7 @@ static inline void bpf_obj_init(const struct btf_record *rec, void *obj)
>  	if (IS_ERR_OR_NULL(rec))
>  		return;
>  	for (i = 0; i < rec->cnt; i++)
> -		memset(obj + rec->fields[i].offset, 0, rec->fields[i].size);
> -
> -	if (rec->refcount_off >= 0)
> -		refcount_set((refcount_t *)(obj + rec->refcount_off), 1);
> +		bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset);

this part make sense.

>  }
>  
>  /* 'dst' must be a temporary buffer and should not point to memory that is being
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 6adbf99dc27f..1208fd8584c9 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1931,15 +1931,16 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
>  	return (void *)p__refcounted_kptr;
>  }
>  
> +#define __init_field_infer_size(field_type, addr)\
> +	__bpf_obj_init_field(field_type, btf_field_type_size(field_type), addr)
> +
>  static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head,
>  			  bool tail, struct btf_record *rec, u64 off)
>  {
>  	struct list_head *n = (void *)node, *h = (void *)head;
>  
>  	if (unlikely(!h->next))
> -		INIT_LIST_HEAD(h);
> -	if (unlikely(!n->next))
> -		INIT_LIST_HEAD(n);
> +		__init_field_infer_size(BPF_LIST_HEAD, h);

but this part is dubious.
What's the value? I think it's cleaner to keep it open coded with INIT_LIST_HEAD()
instead of hiding it through the helper.

>  	if (!list_empty(n)) {
>  		/* Only called from BPF prog, no need to migrate_disable */
>  		__bpf_obj_drop_impl(n - off, rec);
> @@ -1976,7 +1977,7 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
>  	struct list_head *n, *h = (void *)head;
>  
>  	if (unlikely(!h->next))
> -		INIT_LIST_HEAD(h);
> +		__init_field_infer_size(BPF_LIST_HEAD, h);

same here.

>  	if (list_empty(h))
>  		return NULL;
>  	n = tail ? h->prev : h->next;
> @@ -1984,6 +1985,8 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
>  	return (struct bpf_list_node *)n;
>  }
>  
> +#undef __init_field_infer_size
> +
>  __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
>  {
>  	return __bpf_list_del(head, false);
> @@ -2000,9 +2003,6 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
>  	struct rb_root_cached *r = (struct rb_root_cached *)root;
>  	struct rb_node *n = (struct rb_node *)node;
>  
> -	if (!n->__rb_parent_color)
> -		RB_CLEAR_NODE(n);
> -
>  	if (RB_EMPTY_NODE(n))
>  		return NULL;
>  
> @@ -2022,9 +2022,6 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
>  	bpf_callback_t cb = (bpf_callback_t)less;
>  	bool leftmost = true;
>  
> -	if (!n->__rb_parent_color)
> -		RB_CLEAR_NODE(n);
> -
>  	if (!RB_EMPTY_NODE(n)) {
>  		/* Only called from BPF prog, no need to migrate_disable */
>  		__bpf_obj_drop_impl(n - off, rec);
> -- 
> 2.34.1
>