On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote: > > This Patch adds kfuncs bpf_iter_css_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_css in open-coded iterator > style. These kfuncs actually wrapps css_next_descendant_{pre, post}. > css_iter can be used to: > > 1) iterating a sepcific cgroup tree with pre/post/up order > > 2) iterating cgroup_subsystem in BPF Prog, like > for_each_mem_cgroup_tree/cpuset_for_each_descendant_pre in kernel. > > The API design is consistent with cgroup_iter. bpf_iter_css_new accepts > parameters defining iteration order and starting css. Here we also reuse > BPF_CGROUP_ITER_DESCENDANTS_PRE, BPF_CGROUP_ITER_DESCENDANTS_POST, > BPF_CGROUP_ITER_ANCESTORS_UP enums. > > Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> > --- > kernel/bpf/cgroup_iter.c | 57 +++++++++++++++++++ > kernel/bpf/helpers.c | 3 + > .../testing/selftests/bpf/bpf_experimental.h | 6 ++ > 3 files changed, 66 insertions(+) > > diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c > index 810378f04fbc..ebc3d9471f52 100644 > --- a/kernel/bpf/cgroup_iter.c > +++ b/kernel/bpf/cgroup_iter.c > @@ -294,3 +294,60 @@ static int __init bpf_cgroup_iter_init(void) > } > > late_initcall(bpf_cgroup_iter_init); > + > +struct bpf_iter_css { > + __u64 __opaque[2]; > + __u32 __opaque_int[1]; > +} __attribute__((aligned(8))); > + same as before, __opaque[3] only > +struct bpf_iter_css_kern { > + struct cgroup_subsys_state *start; > + struct cgroup_subsys_state *pos; > + int order; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it, > + struct cgroup_subsys_state *start, enum bpf_cgroup_iter_order order) Similarly, I wonder if we should go for a more generic "flags" argument? > +{ > + struct bpf_iter_css_kern *kit = (void *)it; empty line > + kit->start = NULL; > + BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_kern) != __alignof__(struct bpf_iter_css)); please move this up before kit->start assignment, and separate by empty lines > + switch (order) { > + case BPF_CGROUP_ITER_DESCENDANTS_PRE: > + case BPF_CGROUP_ITER_DESCENDANTS_POST: > + case BPF_CGROUP_ITER_ANCESTORS_UP: > + break; > + default: > + return -EINVAL; > + } > + > + kit->start = start; > + kit->pos = NULL; > + kit->order = order; > + return 0; > +} > + > +__bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) > +{ > + struct bpf_iter_css_kern *kit = (void *)it; empty line > + if (!kit->start) > + return NULL; > + > + switch (kit->order) { > + case BPF_CGROUP_ITER_DESCENDANTS_PRE: > + kit->pos = css_next_descendant_pre(kit->pos, kit->start); > + break; > + case BPF_CGROUP_ITER_DESCENDANTS_POST: > + kit->pos = css_next_descendant_post(kit->pos, kit->start); > + break; > + default: we know it's BPF_CGROUP_ITER_ANCESTORS_UP, so why not have that here explicitly? > + kit->pos = kit->pos ? kit->pos->parent : kit->start; > + } > + > + return kit->pos; wouldn't this implementation never return the "start" css? is that intentional? > +} > + > +__bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it) > +{ > +} > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 556262c27a75..9c3af36249a2 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2510,6 +2510,9 @@ BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) > BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS) > +BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h > index d989775dbdb5..aa247d1d81d1 100644 > --- a/tools/testing/selftests/bpf/bpf_experimental.h > +++ b/tools/testing/selftests/bpf/bpf_experimental.h > @@ -174,4 +174,10 @@ extern int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, > extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym; > extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym; > > +struct bpf_iter_css; > +extern int bpf_iter_css_new(struct bpf_iter_css *it, > + struct cgroup_subsys_state *start, enum bpf_cgroup_iter_order order) __weak __ksym; > +extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; > +extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; > + > #endif > -- > 2.20.1 >