[patch 21/25] mm/madvise: support both pid and pidfd for process_madvise

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Minchan Kim <minchan@xxxxxxxxxx>
Subject: mm/madvise: support both pid and pidfd for process_madvise

There is a demand[1] to support pid as well pidfd for process_madvise
to reduce unnecessary syscall to get pidfd if the user has control of
the target process (ie, they could guarantee the process is not gone or
pid is not reused).

This patch aims for supporting both options like waitid(2).  So, the
syscall is currently,

        int process_madvise(idtype_t idtype, id_t id, void *addr,
                size_t length, int advice, unsigned long flags);

@which is actually idtype_t for userspace library and currently, it
supports P_PID and P_PIDFD.

[1]  https://lore.kernel.org/linux-mm/9d849087-3359-c4ab-fbec-859e8186c509@xxxxxxxxxxxxx/

Link: http://lkml.kernel.org/r/20200302193630.68771-6-minchan@xxxxxxxxxx
Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
Suggested-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx>
Reviewed-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>
Reviewed-by: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Christian Brauner <christian@xxxxxxxxxx>
Cc: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx>
Cc: Brian Geffon <bgeffon@xxxxxxxxxx>
Cc: Daniel Colascione <dancol@xxxxxxxxxx>
Cc: Jann Horn <jannh@xxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: John Dias <joaodias@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Oleksandr Natalenko <oleksandr@xxxxxxxxxx>
Cc: Sandeep Patil <sspatil@xxxxxxxxxx>
Cc: SeongJae Park <sj38.park@xxxxxxxxx>
Cc: SeongJae Park <sjpark@xxxxxxxxx>
Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
Cc: Sonny Rao <sonnyrao@xxxxxxxxxx>
Cc: Tim Murray <timmurray@xxxxxxxxxx>
Cc: Christian Brauner <christian.brauner@xxxxxxxxxx>
Cc: <linux-man@xxxxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/syscalls.h |    3 ++-
 mm/madvise.c             |   36 +++++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 14 deletions(-)

--- a/include/linux/syscalls.h~mm-support-both-pid-and-pidfd-for-process_madvise
+++ a/include/linux/syscalls.h
@@ -878,7 +878,8 @@ asmlinkage long sys_munlockall(void);
 asmlinkage long sys_mincore(unsigned long start, size_t len,
 				unsigned char __user * vec);
 asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
-asmlinkage long sys_process_madvise(int pidfd, unsigned long start,
+
+asmlinkage long sys_process_madvise(int which, pid_t pid, unsigned long start,
 			size_t len, int behavior, unsigned long flags);
 asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 			unsigned long prot, unsigned long pgoff,
--- a/mm/madvise.c~mm-support-both-pid-and-pidfd-for-process_madvise
+++ a/mm/madvise.c
@@ -1208,11 +1208,10 @@ SYSCALL_DEFINE3(madvise, unsigned long,
 	return do_madvise(current, current->mm, start, len_in, behavior);
 }
 
-SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
+SYSCALL_DEFINE6(process_madvise, int, which, pid_t, upid, unsigned long, start,
 		size_t, len_in, int, behavior, unsigned long, flags)
 {
 	int ret;
-	struct fd f;
 	struct pid *pid;
 	struct task_struct *task;
 	struct mm_struct *mm;
@@ -1223,20 +1222,31 @@ SYSCALL_DEFINE5(process_madvise, int, pi
 	if (!process_madvise_behavior_valid(behavior))
 		return -EINVAL;
 
-	f = fdget(pidfd);
-	if (!f.file)
-		return -EBADF;
-
-	pid = pidfd_pid(f.file);
-	if (IS_ERR(pid)) {
-		ret = PTR_ERR(pid);
-		goto fdput;
+	switch (which) {
+	case P_PID:
+		if (upid <= 0)
+			return -EINVAL;
+
+		pid = find_get_pid(upid);
+		if (!pid)
+			return -ESRCH;
+		break;
+	case P_PIDFD:
+		if (upid < 0)
+			return -EINVAL;
+
+		pid = pidfd_get_pid(upid);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	task = get_pid_task(pid, PIDTYPE_PID);
 	if (!task) {
 		ret = -ESRCH;
-		goto fdput;
+		goto put_pid;
 	}
 
 	mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
@@ -1249,7 +1259,7 @@ SYSCALL_DEFINE5(process_madvise, int, pi
 	mmput(mm);
 release_task:
 	put_task_struct(task);
-fdput:
-	fdput(f);
+put_pid:
+	put_pid(pid);
 	return ret;
 }
_



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux