On Wed, Apr 8, 2020 at 4:26 PM Yonghong Song <yhs@xxxxxx> wrote: > > Only the tasks belonging to "current" pid namespace > are enumerated. > > For task/file target, the bpf program will have access to > struct task_struct *task > u32 fd > struct file *file > where fd/file is an open file for the task. > > Signed-off-by: Yonghong Song <yhs@xxxxxx> > --- > kernel/bpf/Makefile | 2 +- > kernel/bpf/dump_task.c | 294 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 295 insertions(+), 1 deletion(-) > create mode 100644 kernel/bpf/dump_task.c > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index 4a1376ab2bea..7e2c73deabab 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -26,7 +26,7 @@ obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o > endif > ifeq ($(CONFIG_SYSFS),y) > obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o > -obj-$(CONFIG_BPF_SYSCALL) += dump.o > +obj-$(CONFIG_BPF_SYSCALL) += dump.o dump_task.o > endif > ifeq ($(CONFIG_BPF_JIT),y) > obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o > diff --git a/kernel/bpf/dump_task.c b/kernel/bpf/dump_task.c > new file mode 100644 > index 000000000000..69b0bcec68e9 > --- /dev/null > +++ b/kernel/bpf/dump_task.c > @@ -0,0 +1,294 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2020 Facebook */ > + > +#include <linux/init.h> > +#include <linux/namei.h> > +#include <linux/pid_namespace.h> > +#include <linux/fs.h> > +#include <linux/fdtable.h> > +#include <linux/filter.h> > + > +struct bpfdump_seq_task_info { > + struct pid_namespace *ns; > + struct task_struct *task; > + u32 id; > +}; > + > +static struct task_struct *task_seq_get_next(struct pid_namespace *ns, u32 *id) > +{ > + struct task_struct *task; > + struct pid *pid; > + > + rcu_read_lock(); > + pid = idr_get_next(&ns->idr, id); > + task = get_pid_task(pid, PIDTYPE_PID); > + if (task) > + get_task_struct(task); I think get_pid_task() already calls get_task_struct() internally on success. See also bpf_task_fd_query() implementation, it doesn't take extra refcnt on task. > + rcu_read_unlock(); > + > + return task; > +} > + [...] > +static struct file *task_file_seq_get_next(struct pid_namespace *ns, u32 *id, > + int *fd, struct task_struct **task, > + struct files_struct **fstruct) > +{ > + struct files_struct *files; > + struct task_struct *tk; > + u32 sid = *id; > + int sfd; > + > + /* If this function returns a non-NULL file object, > + * it held a reference to the files_struct and file. > + * Otherwise, it does not hold any reference. > + */ > +again: > + if (*fstruct) { > + files = *fstruct; > + sfd = *fd; > + } else { > + tk = task_seq_get_next(ns, &sid); > + if (!tk) > + return NULL; > + files = get_files_struct(tk); > + put_task_struct(tk); > + if (!files) > + return NULL; There might still be another task with its own files, so shouldn't we keep iterating tasks here? > + *fstruct = files; > + *task = tk; > + if (sid == *id) { > + sfd = *fd; > + } else { > + *id = sid; > + sfd = 0; > + } > + } > + > + spin_lock(&files->file_lock); > + for (; sfd < files_fdtable(files)->max_fds; sfd++) { > + struct file *f; > + > + f = fcheck_files(files, sfd); > + if (!f) > + continue; > + > + *fd = sfd; > + get_file(f); > + spin_unlock(&files->file_lock); > + return f; > + } > + > + /* the current task is done, go to the next task */ > + spin_unlock(&files->file_lock); > + put_files_struct(files); > + *fstruct = NULL; > + sid = ++(*id); > + goto again; > +} > + [...] > +static int task_file_seq_show(struct seq_file *seq, void *v) > +{ > + struct bpfdump_seq_task_file_info *info = seq->private; > + struct { > + struct task_struct *task; > + u32 fd; > + struct file *file; > + struct seq_file *seq; > + u64 seq_num; should all the fields here be 8-byte aligned, including pointers (because BPF is 64-bit arch)? Well, at least `u32 fd` should? > + } ctx = { > + .file = v, > + .seq = seq, > + }; > + struct bpf_prog *prog; > + int ret; > + > + prog = bpf_dump_get_prog(seq, sizeof(struct bpfdump_seq_task_file_info), > + &ctx.seq_num); > + ctx.task = info->task; > + ctx.fd = info->fd; > + ret = bpf_dump_run_prog(prog, &ctx); > + > + return ret == 0 ? 0 : -EINVAL; > +} > + > +static const struct seq_operations task_file_seq_ops = { > + .start = task_file_seq_start, > + .next = task_file_seq_next, > + .stop = task_file_seq_stop, > + .show = task_file_seq_show, > +}; > + > +int __init bpfdump__task(struct task_struct *task, struct seq_file *seq, > + u64 seq_num) { > + return 0; > +} > + > +int __init bpfdump__task_file(struct task_struct *task, u32 fd, > + struct file *file, struct seq_file *seq, > + u64 seq_num) > +{ > + return 0; > +} > + > +static int __init task_dump_init(void) > +{ > + int ret; > + > + ret = bpf_dump_reg_target("task", "bpfdump__task", > + &task_seq_ops, > + sizeof(struct bpfdump_seq_task_info), 0); > + if (ret) > + return ret; > + > + return bpf_dump_reg_target("task/file", "bpfdump__task_file", > + &task_file_seq_ops, > + sizeof(struct bpfdump_seq_task_file_info), > + 0); > +} > +late_initcall(task_dump_init); > -- > 2.24.1 >