On Mon, Aug 21, 2023 at 10:34 AM Dave Marchevsky <davemarchevsky@xxxxxx> wrote: > > This patch adds kfuncs bpf_iter_task_vma_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_task_vma in open-coded > iterator style. BPF programs can use these kfuncs directly or through > bpf_for_each macro for natural-looking iteration of all task vmas. > > The implementation borrows heavily from bpf_find_vma helper's locking - > differing only in that it holds the mmap_read lock for all iterations > while the helper only executes its provided callback on a maximum of 1 > vma. Aside from locking, struct vma_iterator and vma_next do all the > heavy lifting. > > The newly-added struct bpf_iter_task_vma has a name collision with a > selftest for the seq_file task_vma iter's bpf skel, so the selftests/bpf/progs > file is renamed in order to avoid the collision. > > Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx> > Cc: Nathan Slingerland <slinger@xxxxxxxx> > --- > include/uapi/linux/bpf.h | 4 + > kernel/bpf/helpers.c | 3 + > kernel/bpf/task_iter.c | 79 +++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 5 ++ > tools/lib/bpf/bpf_helpers.h | 8 ++ > .../selftests/bpf/prog_tests/bpf_iter.c | 26 +++--- > ...f_iter_task_vma.c => bpf_iter_task_vmas.c} | 0 > 7 files changed, 112 insertions(+), 13 deletions(-) > rename tools/testing/selftests/bpf/progs/{bpf_iter_task_vma.c => bpf_iter_task_vmas.c} (100%) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index d21deb46f49f..d90f9bf8080f 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7291,4 +7291,8 @@ struct bpf_iter_num { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_task_vma { > + __u64 __opaque[4]; /* See bpf_iter_num comment above */ > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index eb91cae0612a..7a06dea749f1 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2482,6 +2482,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW) > +BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index c4ab9d6cdbe9..fb934ca9e020 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -7,7 +7,9 @@ > #include <linux/fs.h> > #include <linux/fdtable.h> > #include <linux/filter.h> > +#include <linux/bpf_mem_alloc.h> > #include <linux/btf_ids.h> > +#include <linux/mm_types.h> > #include "mmap_unlock_work.h" > > static const char * const iter_task_type_names[] = { > @@ -823,6 +825,83 @@ const struct bpf_func_proto bpf_find_vma_proto = { > .arg5_type = ARG_ANYTHING, > }; > > +/* Non-opaque version of uapi bpf_iter_task_vma */ > +struct bpf_iter_task_vma_kern { > + struct task_struct *task; > + struct mm_struct *mm; > + struct mmap_unlock_irq_work *work; > + struct vma_iterator *vmi; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, > + struct task_struct *task, u64 addr) > +{ > + struct bpf_iter_task_vma_kern *kit = (void *)it; > + bool irq_work_busy = false; > + int err; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma)); > + > + /* NULL i->mm signals failed bpf_iter_task_vma initialization. > + * i->work == NULL is valid. > + */ > + kit->mm = NULL; > + kit->task = NULL; > + if (!task) > + return -ENOENT; > + > + kit->task = get_task_struct(task); > + kit->mm = task->mm; > + if (!kit->mm) { > + err = -ENOENT; > + goto err_put_task; > + } > + > + kit->vmi = bpf_mem_alloc(&bpf_global_ma, sizeof(struct vma_iterator)); > + if (!kit->vmi) { > + err = -ENOMEM; > + goto err_put_task; > + } Since alloc is done anyway, let's alloc the whole bpf_iter_task_vma_kern and reduce bpf prog side to a single pointer?