Re: [PATCH bpf-next v5 1/3] bpf: Parameterize task iterators.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 8/10/22 5:16 PM, Kui-Feng Lee wrote:
Allow creating an iterator that loops through resources of one task/thread.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx>
---
  include/linux/bpf.h            |  29 ++++++++
  include/uapi/linux/bpf.h       |   8 +++
  kernel/bpf/task_iter.c         | 126 ++++++++++++++++++++++++++-------
  tools/include/uapi/linux/bpf.h |   8 +++
  4 files changed, 147 insertions(+), 24 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 11950029284f..6bbe53d06faa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1716,8 +1716,37 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
  	extern int bpf_iter_ ## target(args);			\
  	int __init bpf_iter_ ## target(args) { return 0; }
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ *	Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ *	Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ *	Iterate over reosurces of evevry task of a process / task group.
+ */
+enum bpf_iter_task_type {
+	BPF_TASK_ITER_ALL = 0,
+	BPF_TASK_ITER_TID,
+	BPF_TASK_ITER_TGID,
+};
+
  struct bpf_iter_aux_info {
  	struct bpf_map *map;
+	struct {
+		enum bpf_iter_task_type	type;
+		union {
+			u32 tid;
+			u32 tgid;
+			u32 pid_fd;
+		};
+	} task;
  };
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ffcbf79a556b..6328aca0cf5c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -91,6 +91,14 @@ union bpf_iter_link_info {
  	struct {
  		__u32	map_fd;
  	} map;
+	/*
+	 * Parameters of task iterators.
+	 */

The comment can be put into one line.

+	struct {
+		__u32	tid;
+		__u32	tgid;
+		__u32	pid_fd;

The above is a max of kernel and user space terminologies.
tid/pid are user space concept and tgid is a kernel space
concept.

In bpf uapi header, we have

struct bpf_pidns_info {
        __u32 pid;
        __u32 tgid;
};

which uses kernel terminologies.

So I suggest the bpf_iter_link_info.task can also
use pure kernel terminology pid/tgid/tgid_fd here.

Alternative, using pure user space terminology
can be tid/pid/pid_fd but seems the kernel terminology
might be better since we already have precedence.


+	} task;
  };
/* BPF syscall commands, see bpf(2) man-page for more details. */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8c921799def4..f2e21efe075d 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -12,6 +12,12 @@
struct bpf_iter_seq_task_common {
  	struct pid_namespace *ns;
+	enum bpf_iter_task_type	type;
+	union {
+		u32 tid;
+		u32 tgid;
+		u32 pid_fd;
+	};
  };
struct bpf_iter_seq_task_info {
@@ -22,24 +28,40 @@ struct bpf_iter_seq_task_info {
  	u32 tid;
  };
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
  					     u32 *tid,
  					     bool skip_if_dup_files)
  {
  	struct task_struct *task = NULL;
  	struct pid *pid;
+ if (common->type == BPF_TASK_ITER_TID) {
+		if (*tid && *tid != common->tid)
+			return NULL;
+		rcu_read_lock();
+		pid = find_pid_ns(common->tid, common->ns);
+		if (pid) {
+			task = get_pid_task(pid, PIDTYPE_PID);
+			*tid = common->tid;
+		}
+		rcu_read_unlock();
+		return task;
+	}
+
  	rcu_read_lock();
  retry:
-	pid = find_ge_pid(*tid, ns);
+	pid = find_ge_pid(*tid, common->ns);
  	if (pid) {
-		*tid = pid_nr_ns(pid, ns);
+		*tid = pid_nr_ns(pid, common->ns);
  		task = get_pid_task(pid, PIDTYPE_PID);
+

This extra line is unnecessary.

  		if (!task) {
  			++*tid;
  			goto retry;
-		} else if (skip_if_dup_files && !thread_group_leader(task) &&
-			   task->files == task->group_leader->files) {
+		} else if ((skip_if_dup_files && !thread_group_leader(task) &&
+			    task->files == task->group_leader->files) ||
+			   (common->type == BPF_TASK_ITER_TGID &&
+			    __task_pid_nr_ns(task, PIDTYPE_TGID, common->ns) != common->tgid)) {
  			put_task_struct(task);
  			task = NULL;
  			++*tid;
@@ -56,7 +78,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
  	struct bpf_iter_seq_task_info *info = seq->private;
  	struct task_struct *task;
- task = task_seq_get_next(info->common.ns, &info->tid, false);
+	task = task_seq_get_next(&info->common, &info->tid, false);
+

Extra line?

  	if (!task)
  		return NULL;
@@ -73,7 +96,8 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  	++*pos;
  	++info->tid;
  	put_task_struct((struct task_struct *)v);
-	task = task_seq_get_next(info->common.ns, &info->tid, false);
+

Extra line?

+	task = task_seq_get_next(&info->common, &info->tid, false);
  	if (!task)
  		return NULL;
@@ -117,6 +141,43 @@ static void task_seq_stop(struct seq_file *seq, void *v)
  		put_task_struct((struct task_struct *)v);
  }
+static int bpf_iter_attach_task(struct bpf_prog *prog,
+				union bpf_iter_link_info *linfo,
+				struct bpf_iter_aux_info *aux)
+{
+	unsigned int flags;
+	struct pid_namespace *ns;
+	struct pid *pid;
+	pid_t tgid;

Follow reverse chrismas tree style?

+
+	if (linfo->task.tid != 0) {
+		aux->task.type = BPF_TASK_ITER_TID;
+		aux->task.tid = linfo->task.tid;
+	} else if (linfo->task.tgid != 0) {
+		aux->task.type = BPF_TASK_ITER_TGID;
+		aux->task.tgid = linfo->task.tgid;
+	} else if (linfo->task.pid_fd != 0) {
+		aux->task.type = BPF_TASK_ITER_TGID;
+		pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
+
+		ns = task_active_pid_ns(current);
+		if (IS_ERR(ns))
+			return PTR_ERR(ns);
+
+		tgid = pid_nr_ns(pid, ns);
+		if (tgid <= 0)
+			return -EINVAL;

Is it possible that tgid <= 0? I think no, so
the above two lines are unnecessary.

+
+		aux->task.tgid = tgid;

We leaks the reference count for 'pid' here.
We need to add
		put_pid(pid);
to release the reference for pid.
	
+	} else {
+		aux->task.type = BPF_TASK_ITER_ALL;
+	}

What will happen if two or all of task.tid, task.tgid and
task.pid_fd non-zero? Should we fail here?

+
+	return 0;
+}
+
  static const struct seq_operations task_seq_ops = {
  	.start	= task_seq_start,
  	.next	= task_seq_next,
@@ -137,8 +198,7 @@ struct bpf_iter_seq_task_file_info {
  static struct file *
[...]
@@ -307,11 +381,10 @@ enum bpf_task_vma_iter_find_op {
  static struct vm_area_struct *
  task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
  {
-	struct pid_namespace *ns = info->common.ns;
  	enum bpf_task_vma_iter_find_op op;
  	struct vm_area_struct *curr_vma;
  	struct task_struct *curr_task;
-	u32 curr_tid = info->tid;
+	u32 saved_tid = info->tid;
/* If this function returns a non-NULL vma, it holds a reference to
  	 * the task_struct, and holds read lock on vma->mm->mmap_lock.
@@ -371,14 +444,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
  		}
  	} else {
  again:
-		curr_task = task_seq_get_next(ns, &curr_tid, true);
+		curr_task = task_seq_get_next(&info->common, &info->tid, true);
  		if (!curr_task) {
-			info->tid = curr_tid + 1;
+			info->tid++;
  			goto finish;
  		}
- if (curr_tid != info->tid) {
-			info->tid = curr_tid;
+		if (saved_tid != info->tid) {
  			/* new task, process the first vma */
  			op = task_vma_iter_first_vma;
  		} else {
@@ -430,9 +502,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
  	return curr_vma;
next_task:
+	if (info->common.type == BPF_TASK_ITER_TID)
+		goto finish;
+
  	put_task_struct(curr_task);
  	info->task = NULL;
-	curr_tid++;
+	info->tid++;

saved_tid = ++info->tid?

  	goto again;
finish:
[...]



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux