On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote: > > This Patch adds kfuncs bpf_iter_css_{new,next,destroy} which allow creation > and manipulation of struct bpf_iter_css in open-coded iterator style. These > kfuncs actually wrapps css_next_descendant_{pre, post}. BPF programs can > use these kfuncs through bpf_for_each macro for iteration of all descendant > css under a root css. > > Normally, css_next_descendant_{pre, post} should be called with rcu > locking. Although we have bpf_rcu_read_lock(), here we still calls > rcu_read_lock in bpf_iter_css_new and unlock in bpf_iter_css_destroy > for convenience use. > > Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 5 +++++ > kernel/bpf/helpers.c | 3 +++ > kernel/bpf/task_iter.c | 39 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 5 +++++ > tools/lib/bpf/bpf_helpers.h | 6 ++++++ > 5 files changed, 58 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index cfbd527e3733..19f1f1bf9301 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7203,4 +7203,9 @@ struct bpf_iter_process { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_css { > + __u64 __opaque[2]; > + char __opaque_c[1]; Burning extra 8 bytes for flags seems excessive. Maybe let's add two iterators for descendant_post/_pre ? The bpf prog code will be easier to read (no need to guess what bool flag does). > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 81a2005edc26..47d46a51855f 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2461,6 +2461,9 @@ BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW) > +BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index a6717a76c1e0..ef9aef62f1ac 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -893,6 +893,45 @@ __bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > rcu_read_unlock(); > } > > +struct bpf_iter_css_kern { > + struct cgroup_subsys_state *root; > + struct cgroup_subsys_state *pos; > + char flag; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it, > + struct cgroup_subsys_state *root, char flag) > +{ > + struct bpf_iter_css_kern *kit = (void *)it; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_kern) != __alignof__(struct bpf_iter_css)); > + kit->root = root; > + kit->pos = NULL; > + kit->flag = flag; > + rcu_read_lock(); Same request as in previous patch. let's make bpf prog do explicit bpf_rcu_read_lock() instead.