Re: [PATCH bpf-next v3 3/7] bpf: Introduce task open coded iterator kfuncs

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Wed, 27 Sep 2023 16:20:43 -0700

On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote:
>
> This patch adds kfuncs bpf_iter_task_{new,next,destroy} which allow
> creation and manipulation of struct bpf_iter_task in open-coded iterator
> style. BPF programs can use these kfuncs or through bpf_for_each macro to
> iterate all processes in the system.
>
> The API design keep consistent with SEC("iter/task"). bpf_iter_task_new()
> accepts a specific task and iterating type which allows:
> 1. iterating all process in the system
>
> 2. iterating all threads in the system
>
> 3. iterating all threads of a specific task
> Here we also resuse enum bpf_iter_task_type and rename BPF_TASK_ITER_TID
> to BPF_TASK_ITER_THREAD, rename BPF_TASK_ITER_TGID to BPF_TASK_ITER_PROC.
>
> The newly-added struct bpf_iter_task has a name collision with a selftest
> for the seq_file task iter's bpf skel, so the selftests/bpf/progs file is
> renamed in order to avoid the collision.
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx>
> ---
>  include/linux/bpf.h                           |  8 +-
>  kernel/bpf/helpers.c                          |  3 +
>  kernel/bpf/task_iter.c                        | 96 ++++++++++++++++---
>  .../testing/selftests/bpf/bpf_experimental.h  |  5 +
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 18 ++--
>  .../{bpf_iter_task.c => bpf_iter_tasks.c}     |  0
>  6 files changed, 106 insertions(+), 24 deletions(-)
>  rename tools/testing/selftests/bpf/progs/{bpf_iter_task.c => bpf_iter_tasks.c} (100%)
>

[...]

> @@ -692,9 +692,9 @@ static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct b
>  static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq)
>  {
>         seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]);
> -       if (aux->task.type == BPF_TASK_ITER_TID)
> +       if (aux->task.type == BPF_TASK_ITER_THREAD)
>                 seq_printf(seq, "tid:\t%u\n", aux->task.pid);
> -       else if (aux->task.type == BPF_TASK_ITER_TGID)
> +       else if (aux->task.type == BPF_TASK_ITER_PROC)
>                 seq_printf(seq, "pid:\t%u\n", aux->task.pid);
>  }
>
> @@ -856,6 +856,80 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it)
>         bpf_mem_free(&bpf_global_ma, kit->css_it);
>  }
>
> +struct bpf_iter_task {
> +       __u64 __opaque[2];
> +       __u32 __opaque_int[1];

this should be __u64 __opaque[3], because struct takes full 24 bytes

> +} __attribute__((aligned(8)));
> +
> +struct bpf_iter_task_kern {
> +       struct task_struct *task;
> +       struct task_struct *pos;
> +       unsigned int type;
> +} __attribute__((aligned(8)));
> +
> +__bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type)

nit: type -> flags, so we can add a bit more stuff, if necessary

> +{
> +       struct bpf_iter_task_kern *kit = (void *)it;

empty line after variable declarations

> +       BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) != sizeof(struct bpf_iter_task));
> +       BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
> +                                       __alignof__(struct bpf_iter_task));

and I'd add empty line here to keep BUILD_BUG_ON block separate

> +       kit->task = kit->pos = NULL;
> +       switch (type) {
> +       case BPF_TASK_ITER_ALL:
> +       case BPF_TASK_ITER_PROC:
> +       case BPF_TASK_ITER_THREAD:
> +               break;
> +       default:
> +               return -EINVAL;
> +       }
> +
> +       if (type == BPF_TASK_ITER_THREAD)
> +               kit->task = task;
> +       else
> +               kit->task = &init_task;
> +       kit->pos = kit->task;
> +       kit->type = type;
> +       return 0;
> +}
> +
> +__bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it)
> +{
> +       struct bpf_iter_task_kern *kit = (void *)it;
> +       struct task_struct *pos;
> +       unsigned int type;
> +
> +       type = kit->type;
> +       pos = kit->pos;
> +
> +       if (!pos)
> +               goto out;
> +
> +       if (type == BPF_TASK_ITER_PROC)
> +               goto get_next_task;
> +
> +       kit->pos = next_thread(kit->pos);
> +       if (kit->pos == kit->task) {
> +               if (type == BPF_TASK_ITER_THREAD) {
> +                       kit->pos = NULL;
> +                       goto out;
> +               }
> +       } else
> +               goto out;
> +
> +get_next_task:
> +       kit->pos = next_task(kit->pos);
> +       kit->task = kit->pos;
> +       if (kit->pos == &init_task)
> +               kit->pos = NULL;

I can't say I completely follow the logic (e.g., for
BPF_TASK_ITER_PROC, why do we do next_task() on first next() call)?
Can you elabore the expected behavior for various combinations of
types and starting task argument?

> +
> +out:
> +       return pos;
> +}
> +
> +__bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
> +{
> +}
> +
>  DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
>
>  static void do_mmap_read_unlock(struct irq_work *entry)
> diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
> index d3ea90f0e142..d989775dbdb5 100644
> --- a/tools/testing/selftests/bpf/bpf_experimental.h
> +++ b/tools/testing/selftests/bpf/bpf_experimental.h
> @@ -169,4 +169,9 @@ extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
>  extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
>  extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
>
> +struct bpf_iter_task;
> +extern int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type) __weak __ksym;
> +extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
> +extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
> +
>  #endif
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c

please split out selftests changes from kernel-side changes. We only
combine them if kernel changes break selftests, preventing bisection.

[...]