On Mon, Aug 9, 2021 at 4:00 PM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote: > > On 7/30/21 7:34 AM, Andrii Nakryiko wrote: > > Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions > > with all the same readability and maintainability benefits. Making them into > > functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx > > functions. Also, explicitly specifying the type of the BPF prog run callback > > required adjusting __bpf_prog_run_save_cb() to accept const void *, casted > > internally to const struct sk_buff. > > > > Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and > > BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to > > the differences in bpf_run_ctx used for those two different use cases. > > > > I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept > > struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching > > cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that > > required including include/linux/cgroup-defs.h, which I wasn't sure is ok with > > everyone. > > > > The remaining generic BPF_PROG_RUN_ARRAY function will be extended to > > pass-through user-provided context value in the next patch. > > > > Acked-by: Yonghong Song <yhs@xxxxxx> > > Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > > --- > > include/linux/bpf.h | 187 +++++++++++++++++++++++---------------- > > include/linux/filter.h | 5 +- > > kernel/bpf/cgroup.c | 32 +++---- > > kernel/trace/bpf_trace.c | 2 +- > > 4 files changed, 132 insertions(+), 94 deletions(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index c8cc09013210..9c44b56b698f 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -1146,67 +1146,124 @@ struct bpf_run_ctx {}; > > > > struct bpf_cg_run_ctx { > > struct bpf_run_ctx run_ctx; > > - struct bpf_prog_array_item *prog_item; > > + const struct bpf_prog_array_item *prog_item; > > }; > > > > +#ifdef CONFIG_BPF_SYSCALL > > +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) > > +{ > > + struct bpf_run_ctx *old_ctx; > > + > > + old_ctx = current->bpf_ctx; > > + current->bpf_ctx = new_ctx; > > + return old_ctx; > > +} > > + > > +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) > > +{ > > + current->bpf_ctx = old_ctx; > > +} > > +#else /* CONFIG_BPF_SYSCALL */ > > +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) > > +{ > > + return NULL; > > +} > > + > > +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) > > +{ > > +} > > +#endif /* CONFIG_BPF_SYSCALL */ > > nit, but either is fine..: > > static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) > { > struct bpf_run_ctx *old_ctx = NULL; > > #ifdef CONFIG_BPF_SYSCALL > old_ctx = current->bpf_ctx; > current->bpf_ctx = new_ctx; > #endif > return old_ctx; > } > > static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) > { > #ifdef CONFIG_BPF_SYSCALL > current->bpf_ctx = old_ctx; > #endif > } sure, I don't mind > > > /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ > > #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) > > /* BPF program asks to set CN on the packet. */ > > #define BPF_RET_SET_CN (1 << 0) > > > > -#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ > > - ({ \ > > - struct bpf_prog_array_item *_item; \ > > - struct bpf_prog *_prog; \ > > - struct bpf_prog_array *_array; \ > > - struct bpf_run_ctx *old_run_ctx; \ > > - struct bpf_cg_run_ctx run_ctx; \ > > - u32 _ret = 1; \ > > - u32 func_ret; \ > > - migrate_disable(); \ > > - rcu_read_lock(); \ > > - _array = rcu_dereference(array); \ > > - _item = &_array->items[0]; \ > > - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \ > > - while ((_prog = READ_ONCE(_item->prog))) { \ > > - run_ctx.prog_item = _item; \ > > - func_ret = func(_prog, ctx); \ > > - _ret &= (func_ret & 1); \ > > - *(ret_flags) |= (func_ret >> 1); \ > > - _item++; \ > > - } \ > > - bpf_reset_run_ctx(old_run_ctx); \ > > - rcu_read_unlock(); \ > > - migrate_enable(); \ > > - _ret; \ > > - }) > > - > > -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ > > - ({ \ > > - struct bpf_prog_array_item *_item; \ > > - struct bpf_prog *_prog; \ > > - struct bpf_prog_array *_array; \ > > - struct bpf_run_ctx *old_run_ctx; \ > > - struct bpf_cg_run_ctx run_ctx; \ > > - u32 _ret = 1; \ > > - migrate_disable(); \ > > - rcu_read_lock(); \ > > - _array = rcu_dereference(array); \ > > - if (unlikely(check_non_null && !_array))\ > > - goto _out; \ > > - _item = &_array->items[0]; \ > > - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\ > > - while ((_prog = READ_ONCE(_item->prog))) { \ > > - run_ctx.prog_item = _item; \ > > - _ret &= func(_prog, ctx); \ > > - _item++; \ > > - } \ > > - bpf_reset_run_ctx(old_run_ctx); \ > > -_out: \ > > - rcu_read_unlock(); \ > > - migrate_enable(); \ > > - _ret; \ > > - }) > > +typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); > > + > > +static __always_inline u32 > > +BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, > > + const void *ctx, bpf_prog_run_fn run_prog, > > + u32 *ret_flags) > > +{ > > + const struct bpf_prog_array_item *item; > > + const struct bpf_prog *prog; > > + const struct bpf_prog_array *array; > > + struct bpf_run_ctx *old_run_ctx; > > + struct bpf_cg_run_ctx run_ctx; > > + u32 ret = 1; > > + u32 func_ret; > > + > > + migrate_disable(); > > + rcu_read_lock(); > > + array = rcu_dereference(array_rcu); > > + item = &array->items[0]; > > + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); > > + while ((prog = READ_ONCE(item->prog))) { > > + run_ctx.prog_item = item; > > + func_ret = run_prog(prog, ctx); > > + ret &= (func_ret & 1); > > + *(ret_flags) |= (func_ret >> 1); > > + item++; > > + } > > + bpf_reset_run_ctx(old_run_ctx); > > + rcu_read_unlock(); > > + migrate_enable(); > > + return ret; > > +} > > + > > +static __always_inline u32 > > +BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, > > + const void *ctx, bpf_prog_run_fn run_prog) > > +{ > > + const struct bpf_prog_array_item *item; > > + const struct bpf_prog *prog; > > + const struct bpf_prog_array *array; > > + struct bpf_run_ctx *old_run_ctx; > > + struct bpf_cg_run_ctx run_ctx; > > + u32 ret = 1; > > + > > + migrate_disable(); > > + rcu_read_lock(); > > + array = rcu_dereference(array_rcu); > > + item = &array->items[0]; > > + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); > > + while ((prog = READ_ONCE(item->prog))) { > > + run_ctx.prog_item = item; > > + ret &= run_prog(prog, ctx); > > + item++; > > + } > > + bpf_reset_run_ctx(old_run_ctx); > > + rcu_read_unlock(); > > + migrate_enable(); > > + return ret; > > +} > > + > > +static __always_inline u32 > > +BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, > > + const void *ctx, bpf_prog_run_fn run_prog) > > +{ > > + const struct bpf_prog_array_item *item; > > + const struct bpf_prog *prog; > > + const struct bpf_prog_array *array; > > + u32 ret = 1; > > + > > + migrate_disable(); > > + rcu_read_lock(); > > + array = rcu_dereference(array_rcu); > > + if (unlikely(!array)) > > + goto out; > > + item = &array->items[0]; > > + while ((prog = READ_ONCE(item->prog))) { > > + ret &= run_prog(prog, ctx); > > + item++; > > + } > > +out: > > + rcu_read_unlock(); > > + migrate_enable(); > > + return ret; > > +} > > Is there any way we could consolidate the above somewhat further and have things > optimized out at compilation time, e.g. when const args are null/non-null? :/ Do you mean like passing "bool check_for_null" as an extra argument and then do if (check_for_null && unlikely(!array)) goto out; ? I feel like that actually makes a bigger mess and just unnecessarily obfuscates code, while saving just a few lines of straightforward code. We didn't even do that for BPF_PROG_RUN_ARRAY_FLAGS vs __BPF_PROG_RUN_ARRAY before, even though there are about 2 lines of code differences. But also in one of the next patches I'm adding perf-specific bpf_perf_run_ctx (different from bpf_cg_run_ctx), so it will make sharing the logic within these BPF_PROG_RUN_ARRAY* even harder and more convoluted. Or were you talking about some other aspect here? > > > /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs > > * so BPF programs can request cwr for TCP packets. > > @@ -1235,7 +1292,7 @@ _out: \ > > u32 _flags = 0; \ > [...]