On Mon, Apr 10, 2023 at 12:07:52PM -0700, Dave Marchevsky wrote: > All btf_fields in an object are 0-initialized by memset in > bpf_obj_init. This might not be a valid initial state for some field > types, in which case kfuncs that use the type will properly initialize > their input if it's been 0-initialized. Some BPF graph collection types > and kfuncs do this: bpf_list_{head,node} and bpf_rb_node. > > An earlier patch in this series added the bpf_refcount field, for which > the 0 state indicates that the refcounted object should be free'd. > bpf_obj_init treats this field specially, setting refcount to 1 instead > of relying on scattered "refcount is 0? Must have just been initialized, > let's set to 1" logic in kfuncs. > > This patch extends this treatment to list and rbtree field types, > allowing most scattered initialization logic in kfuncs to be removed. > > Note that bpf_{list_head,rb_root} may be inside a BPF map, in which case > they'll be 0-initialized without passing through the newly-added logic, > so scattered initialization logic must remain for these collection root > types. > > Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx> > --- > include/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++---- > kernel/bpf/helpers.c | 17 +++++++---------- > 2 files changed, 41 insertions(+), 14 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 4fc29f9aeaac..8e69948c4adb 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -355,6 +355,39 @@ static inline u32 btf_field_type_align(enum btf_field_type type) > } > } > > +static inline void __bpf_obj_init_field(enum btf_field_type type, u32 size, void *addr) > +{ > + memset(addr, 0, size); > + > + switch (type) { > + case BPF_REFCOUNT: > + refcount_set((refcount_t *)addr, 1); > + break; > + case BPF_RB_NODE: > + RB_CLEAR_NODE((struct rb_node *)addr); > + break; > + case BPF_LIST_HEAD: > + case BPF_LIST_NODE: > + INIT_LIST_HEAD((struct list_head *)addr); > + break; > + case BPF_RB_ROOT: > + /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ > + case BPF_SPIN_LOCK: > + case BPF_TIMER: > + case BPF_KPTR_UNREF: > + case BPF_KPTR_REF: > + break; > + default: > + WARN_ON_ONCE(1); > + return; > + } > +} > + > +static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) > +{ > + __bpf_obj_init_field(field->type, field->size, addr); > +} > + > static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) > { > if (IS_ERR_OR_NULL(rec)) > @@ -369,10 +402,7 @@ static inline void bpf_obj_init(const struct btf_record *rec, void *obj) > if (IS_ERR_OR_NULL(rec)) > return; > for (i = 0; i < rec->cnt; i++) > - memset(obj + rec->fields[i].offset, 0, rec->fields[i].size); > - > - if (rec->refcount_off >= 0) > - refcount_set((refcount_t *)(obj + rec->refcount_off), 1); > + bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset); this part make sense. > } > > /* 'dst' must be a temporary buffer and should not point to memory that is being > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 6adbf99dc27f..1208fd8584c9 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -1931,15 +1931,16 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta > return (void *)p__refcounted_kptr; > } > > +#define __init_field_infer_size(field_type, addr)\ > + __bpf_obj_init_field(field_type, btf_field_type_size(field_type), addr) > + > static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, > bool tail, struct btf_record *rec, u64 off) > { > struct list_head *n = (void *)node, *h = (void *)head; > > if (unlikely(!h->next)) > - INIT_LIST_HEAD(h); > - if (unlikely(!n->next)) > - INIT_LIST_HEAD(n); > + __init_field_infer_size(BPF_LIST_HEAD, h); but this part is dubious. What's the value? I think it's cleaner to keep it open coded with INIT_LIST_HEAD() instead of hiding it through the helper. > if (!list_empty(n)) { > /* Only called from BPF prog, no need to migrate_disable */ > __bpf_obj_drop_impl(n - off, rec); > @@ -1976,7 +1977,7 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai > struct list_head *n, *h = (void *)head; > > if (unlikely(!h->next)) > - INIT_LIST_HEAD(h); > + __init_field_infer_size(BPF_LIST_HEAD, h); same here. > if (list_empty(h)) > return NULL; > n = tail ? h->prev : h->next; > @@ -1984,6 +1985,8 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai > return (struct bpf_list_node *)n; > } > > +#undef __init_field_infer_size > + > __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) > { > return __bpf_list_del(head, false); > @@ -2000,9 +2003,6 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, > struct rb_root_cached *r = (struct rb_root_cached *)root; > struct rb_node *n = (struct rb_node *)node; > > - if (!n->__rb_parent_color) > - RB_CLEAR_NODE(n); > - > if (RB_EMPTY_NODE(n)) > return NULL; > > @@ -2022,9 +2022,6 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, > bpf_callback_t cb = (bpf_callback_t)less; > bool leftmost = true; > > - if (!n->__rb_parent_color) > - RB_CLEAR_NODE(n); > - > if (!RB_EMPTY_NODE(n)) { > /* Only called from BPF prog, no need to migrate_disable */ > __bpf_obj_drop_impl(n - off, rec); > -- > 2.34.1 >