On 8/30/22 7:37 PM, Kui-Feng Lee wrote:
Allow creating an iterator that loops through resources of one
thread/process.
People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process. Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.
Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx>
Acked-by: Yonghong Song <yhs@xxxxxx>
---
include/linux/bpf.h | 25 +++++
include/uapi/linux/bpf.h | 6 ++
kernel/bpf/task_iter.c | 191 +++++++++++++++++++++++++++++----
tools/include/uapi/linux/bpf.h | 6 ++
4 files changed, 206 insertions(+), 22 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9c1674973e03..31ac2c1181f5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1730,6 +1730,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; }
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ * Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ * Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ * Iterate over resources of every task of a process / task group.
+ */
+enum bpf_iter_task_type {
+ BPF_TASK_ITER_ALL = 0,
+ BPF_TASK_ITER_TID,
+ BPF_TASK_ITER_TGID,
+};
+
struct bpf_iter_aux_info {
/* for map_elem iter */
struct bpf_map *map;
@@ -1739,6 +1760,10 @@ struct bpf_iter_aux_info {
struct cgroup *start; /* starting cgroup */
enum bpf_cgroup_iter_order order;
} cgroup;
+ struct {
+ enum bpf_iter_task_type type;
+ u32 pid;
+ } task;
};
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 962960a98835..f212a19eda06 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd;
__u64 cgroup_id;
} cgroup;
+ /* Parameters of task iterators. */
+ struct {
+ __u32 tid;
+ __u32 pid;
+ __u32 pid_fd;
+ } task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8c921799def4..93779a021697 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -12,6 +12,9 @@
struct bpf_iter_seq_task_common {
struct pid_namespace *ns;
+ enum bpf_iter_task_type type;
+ u32 pid;
+ u32 pid_visiting;
};
struct bpf_iter_seq_task_info {
@@ -22,18 +25,114 @@ struct bpf_iter_seq_task_info {
u32 tid;
};
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
+ u32 *tid,
+ bool skip_if_dup_files)
+{
+ struct task_struct *task, *next_task;
+ struct pid *pid;
+ u32 saved_tid;
+
+ if (!*tid) {
+ /* The first time, the iterator calls this function. */
+ pid = find_pid_ns(common->pid, common->ns);
+ if (!pid)
+ return NULL;
+
+ task = get_pid_task(pid, PIDTYPE_TGID);
+ if (!task)
+ return NULL;
+
+ *tid = common->pid;
+ common->pid_visiting = common->pid;
+
+ return task;
+ }
+
+ /* The value of *tid hasn't changed since the last time the
+ * iterator called this function.
+ *
+ * A caller will increase *tid by 1 to move to the next task.
+ * In other words, we should return the task of the given
+ * value of *tid if it hasn't changed since the last time
+ * the iterator called this function.
+ */
+ if (*tid == common->pid_visiting) {
+ pid = find_pid_ns(common->pid_visiting, common->ns);
+ task = get_pid_task(pid, PIDTYPE_PID);
+
+ return task;
+ }
As I mentioned in one of my previous replies, the code is correct but
the comment can be improved.
+
+ pid = find_pid_ns(common->pid_visiting, common->ns);
+ if (!pid)
+ return NULL;
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ return NULL;
+
+retry:
+ next_task = next_thread(task);
+ put_task_struct(task);
+ if (!next_task)
+ return NULL;
+
+ saved_tid = *tid;
+ *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
+ if (*tid == common->pid) {
+ /* Run out of tasks of a process. The tasks of a
+ * thread_group are linked as circular linked list.
+ */
+ *tid = saved_tid;
+ return NULL;
+ }
+
+ get_task_struct(next_task);
+ common->pid_visiting = *tid;
+
+ if (skip_if_dup_files && task->files == task->group_leader->files) {
+ task = next_task;
+ goto retry;
+ }
Okay, I double checked and the above change looks good to me.
+
+ return next_task;
+}
+
[...]