Re: [RESEND RFC PATCH 5/5] pidfd_mem: implemented remote memory mapping system call

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 04, 2020 at 02:31:16PM +0300, Adalbert Lazăr wrote:
> From: Mircea Cirjaliu <mcirjaliu@xxxxxxxxxxxxxxx>
> 
> This system call returns 2 fds for inspecting the address space of a
> remote process: one for control and one for access. Use according to
> remote mapping specifications.
> 
> Cc: Christian Brauner <christian@xxxxxxxxxx>
> Signed-off-by: Mircea Cirjaliu <mcirjaliu@xxxxxxxxxxxxxxx>
> Signed-off-by: Adalbert Lazăr <alazar@xxxxxxxxxxxxxxx>
> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>  arch/x86/entry/syscalls/syscall_64.tbl |  1 +
>  include/linux/pid.h                    |  1 +
>  include/linux/syscalls.h               |  1 +
>  include/uapi/asm-generic/unistd.h      |  2 +
>  kernel/exit.c                          |  2 +-
>  kernel/pid.c                           | 55 ++++++++++++++++++++++++++
>  7 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index 54581ac671b4..ca1b5a32dbc5 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -440,5 +440,6 @@
>  433	i386	fspick			sys_fspick
>  434	i386	pidfd_open		sys_pidfd_open
>  435	i386	clone3			sys_clone3
> +436     i386    pidfd_mem               sys_pidfd_mem
>  437	i386	openat2			sys_openat2
>  438	i386	pidfd_getfd		sys_pidfd_getfd
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 37b844f839bc..6138d3d023f8 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -357,6 +357,7 @@
>  433	common	fspick			sys_fspick
>  434	common	pidfd_open		sys_pidfd_open
>  435	common	clone3			sys_clone3
> +436     common  pidfd_mem               sys_pidfd_mem
>  437	common	openat2			sys_openat2
>  438	common	pidfd_getfd		sys_pidfd_getfd
>  
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index cc896f0fc4e3..9ec23ab23fd4 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -76,6 +76,7 @@ extern const struct file_operations pidfd_fops;
>  
>  struct file;
>  
> +extern struct pid *pidfd_get_pid(unsigned int fd);
>  extern struct pid *pidfd_pid(const struct file *file);
>  
>  static inline struct pid *get_pid(struct pid *pid)
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 1815065d52f3..621f3d52ed4e 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -934,6 +934,7 @@ asmlinkage long sys_clock_adjtime32(clockid_t which_clock,
>  asmlinkage long sys_syncfs(int fd);
>  asmlinkage long sys_setns(int fd, int nstype);
>  asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
> +asmlinkage long sys_pidfd_mem(int pidfd, int __user *fds, unsigned int flags);
>  asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
>  			     unsigned int vlen, unsigned flags);
>  asmlinkage long sys_process_vm_readv(pid_t pid,
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 3a3201e4618e..2663afc03c86 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -850,6 +850,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
>  #define __NR_clone3 435
>  __SYSCALL(__NR_clone3, sys_clone3)
>  #endif
> +#define __NR_pidfd_mem 436
> +__SYSCALL(__NR_pidfd_mem, sys_pidfd_mem)
>  
>  #define __NR_openat2 437
>  __SYSCALL(__NR_openat2, sys_openat2)
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 389a88cb3081..37cd8949e606 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -1464,7 +1464,7 @@ static long do_wait(struct wait_opts *wo)
>  	return retval;
>  }
>  
> -static struct pid *pidfd_get_pid(unsigned int fd)
> +struct pid *pidfd_get_pid(unsigned int fd)
>  {
>  	struct fd f;
>  	struct pid *pid;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index c835b844aca7..c9c49edf4a8a 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -42,6 +42,7 @@
>  #include <linux/sched/signal.h>
>  #include <linux/sched/task.h>
>  #include <linux/idr.h>
> +#include <linux/remote_mapping.h>
>  
>  struct pid init_struct_pid = {
>  	.count		= REFCOUNT_INIT(1),
> @@ -565,6 +566,60 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
>  	return fd;
>  }
>  
> +/**
> + * pidfd_mem() - Allow access to process address space.
> + *
> + * @pidfd: pid file descriptor for the target process
> + * @fds:   array where the control and access file descriptors are returned
> + * @flags: flags to pass
> + *
> + * This creates a pair of file descriptors used to gain access to the
> + * target process memory. The control fd is used to establish a linear
> + * mapping between an offset range and a userspace address range.
> + * The access fd is used to mmap(offset range) on the client side.
> + *
> + * Return: On success, 0 is returned.
> + *         On error, a negative errno number will be returned.
> + */
> +SYSCALL_DEFINE3(pidfd_mem, int, pidfd, int __user *, fds, unsigned int, flags)
> +{
> +	struct pid *pid;
> +	struct task_struct *task;
> +	int ret_fds[2];
> +	int ret;
> +
> +	if (pidfd < 0)
> +		return -EINVAL;

This needs to be EBADF.

> +	if (!fds)
> +		return -EINVAL;

If this API holds up, I think similar to what Florian suggested, a
struct would be nicer for userspace. Sm like:

struct rmemfds {
	s32 memfd1;
	s32 memfd2;
}

and passing it a size

pidfd_mem(int pidfd, struct rmemfd *fds, size_t size, unsigned int, flags)

and then copy_struct_from_user() will take care to do the right thing
for you.

> +	if (flags)
> +		return -EINVAL;
> +
> +	pid = pidfd_get_pid(pidfd);
> +	if (IS_ERR(pid))
> +		return PTR_ERR(pid);
> +
> +	task = get_pid_task(pid, PIDTYPE_PID);
> +	put_pid(pid);
> +	if (IS_ERR(task))
> +		return PTR_ERR(task);
> +
> +	ret = -EPERM;
> +	if (unlikely(task == current) || capable(CAP_SYS_PTRACE))
> +		ret = task_remote_map(task, ret_fds);
> +	put_task_struct(task);
> +	if (IS_ERR_VALUE((long)ret))
> +		return ret;
> +
> +	if (copy_to_user(fds, ret_fds, sizeof(ret_fds))) {
> +		put_unused_fd(ret_fds[0]);
> +		put_unused_fd(ret_fds[1]);
> +		return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
>  void __init pid_idr_init(void)
>  {
>  	/* Verify no one has done anything silly: */



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux