On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote: > > This patch adds kfuncs bpf_iter_task_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_task in open-coded iterator > style. BPF programs can use these kfuncs or through bpf_for_each macro to > iterate all processes in the system. > > The API design keep consistent with SEC("iter/task"). bpf_iter_task_new() > accepts a specific task and iterating type which allows: > 1. iterating all process in the system > > 2. iterating all threads in the system > > 3. iterating all threads of a specific task > Here we also resuse enum bpf_iter_task_type and rename BPF_TASK_ITER_TID > to BPF_TASK_ITER_THREAD, rename BPF_TASK_ITER_TGID to BPF_TASK_ITER_PROC. > > The newly-added struct bpf_iter_task has a name collision with a selftest > for the seq_file task iter's bpf skel, so the selftests/bpf/progs file is > renamed in order to avoid the collision. > > Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> > --- > include/linux/bpf.h | 8 +- > kernel/bpf/helpers.c | 3 + > kernel/bpf/task_iter.c | 96 ++++++++++++++++--- > .../testing/selftests/bpf/bpf_experimental.h | 5 + > .../selftests/bpf/prog_tests/bpf_iter.c | 18 ++-- > .../{bpf_iter_task.c => bpf_iter_tasks.c} | 0 > 6 files changed, 106 insertions(+), 24 deletions(-) > rename tools/testing/selftests/bpf/progs/{bpf_iter_task.c => bpf_iter_tasks.c} (100%) > [...] > @@ -692,9 +692,9 @@ static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct b > static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) > { > seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); > - if (aux->task.type == BPF_TASK_ITER_TID) > + if (aux->task.type == BPF_TASK_ITER_THREAD) > seq_printf(seq, "tid:\t%u\n", aux->task.pid); > - else if (aux->task.type == BPF_TASK_ITER_TGID) > + else if (aux->task.type == BPF_TASK_ITER_PROC) > seq_printf(seq, "pid:\t%u\n", aux->task.pid); > } > > @@ -856,6 +856,80 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > bpf_mem_free(&bpf_global_ma, kit->css_it); > } > > +struct bpf_iter_task { > + __u64 __opaque[2]; > + __u32 __opaque_int[1]; this should be __u64 __opaque[3], because struct takes full 24 bytes > +} __attribute__((aligned(8))); > + > +struct bpf_iter_task_kern { > + struct task_struct *task; > + struct task_struct *pos; > + unsigned int type; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type) nit: type -> flags, so we can add a bit more stuff, if necessary > +{ > + struct bpf_iter_task_kern *kit = (void *)it; empty line after variable declarations > + BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) != sizeof(struct bpf_iter_task)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != > + __alignof__(struct bpf_iter_task)); and I'd add empty line here to keep BUILD_BUG_ON block separate > + kit->task = kit->pos = NULL; > + switch (type) { > + case BPF_TASK_ITER_ALL: > + case BPF_TASK_ITER_PROC: > + case BPF_TASK_ITER_THREAD: > + break; > + default: > + return -EINVAL; > + } > + > + if (type == BPF_TASK_ITER_THREAD) > + kit->task = task; > + else > + kit->task = &init_task; > + kit->pos = kit->task; > + kit->type = type; > + return 0; > +} > + > +__bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) > +{ > + struct bpf_iter_task_kern *kit = (void *)it; > + struct task_struct *pos; > + unsigned int type; > + > + type = kit->type; > + pos = kit->pos; > + > + if (!pos) > + goto out; > + > + if (type == BPF_TASK_ITER_PROC) > + goto get_next_task; > + > + kit->pos = next_thread(kit->pos); > + if (kit->pos == kit->task) { > + if (type == BPF_TASK_ITER_THREAD) { > + kit->pos = NULL; > + goto out; > + } > + } else > + goto out; > + > +get_next_task: > + kit->pos = next_task(kit->pos); > + kit->task = kit->pos; > + if (kit->pos == &init_task) > + kit->pos = NULL; I can't say I completely follow the logic (e.g., for BPF_TASK_ITER_PROC, why do we do next_task() on first next() call)? Can you elabore the expected behavior for various combinations of types and starting task argument? > + > +out: > + return pos; > +} > + > +__bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it) > +{ > +} > + > DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); > > static void do_mmap_read_unlock(struct irq_work *entry) > diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h > index d3ea90f0e142..d989775dbdb5 100644 > --- a/tools/testing/selftests/bpf/bpf_experimental.h > +++ b/tools/testing/selftests/bpf/bpf_experimental.h > @@ -169,4 +169,9 @@ extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, > extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; > extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; > > +struct bpf_iter_task; > +extern int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type) __weak __ksym; > +extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym; > +extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym; > + > #endif > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c please split out selftests changes from kernel-side changes. We only combine them if kernel changes break selftests, preventing bisection. [...]