This adds an ioctl which allows file descriptors to be extracted from processes based on their pidfd. One reason to use this is to allow sandboxers to take actions on file descriptors on the behalf of another process. For example, this can be combined with seccomp-bpf's user notification to do on-demand fd extraction and take privileged actions. For example, it can be used to bind a socket to a privileged port. This is similar to ptrace, and using ptrace parasitic code injection to extract a file descriptor from a process, but without breaking debuggers, or paying the ptrace overhead cost. You must have the ability to ptrace the process in order to extract any file descriptors from it. ptrace can already be used to extract file descriptors based on parasitic code injections, so the permissions model is aligned. The ioctl takes a pointer to pidfd_getfd_args. pidfd_getfd_args contains a size, which allows for gradual evolution of the API. There is an options field, which can be used to state whether the fd should be opened with CLOEXEC, or not. An additional options field may be added in the future to include the ability to clear cgroup information about the file descriptor at a later point. If the structure is from a newer kernel, and includes members which make it larger than the structure that's known to this kernel version, E2BIG will be returned. Signed-off-by: Sargun Dhillon <sargun@xxxxxxxxx> --- Documentation/ioctl/ioctl-number.rst | 1 + include/linux/pid.h | 1 + include/uapi/linux/pid.h | 26 ++++++++++ kernel/fork.c | 72 ++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 include/uapi/linux/pid.h diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst index bef79cd4c6b4..be2efb93acd1 100644 --- a/Documentation/ioctl/ioctl-number.rst +++ b/Documentation/ioctl/ioctl-number.rst @@ -272,6 +272,7 @@ Code Seq# Include File Comments <mailto:tim@xxxxxxxxxxxx> 'p' A1-A5 linux/pps.h LinuxPPS <mailto:giometti@xxxxxxxx> +'p' B0-CF uapi/linux/pid.h 'q' 00-1F linux/serio.h 'q' 80-FF linux/telephony.h Internet PhoneJACK, Internet LineJACK linux/ixjuser.h <http://web.archive.org/web/%2A/http://www.quicknet.net> diff --git a/include/linux/pid.h b/include/linux/pid.h index 9645b1194c98..65f1a73040c9 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -5,6 +5,7 @@ #include <linux/rculist.h> #include <linux/wait.h> #include <linux/refcount.h> +#include <uapi/linux/pid.h> enum pid_type { diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h new file mode 100644 index 000000000000..4ec02ed8b39a --- /dev/null +++ b/include/uapi/linux/pid.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_PID_H +#define _UAPI_LINUX_PID_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* options to pass in to pidfd_getfd_args flags */ +#define PIDFD_GETFD_CLOEXEC (1 << 0) /* open the fd with cloexec */ + +struct pidfd_getfd_args { + __u32 size; /* sizeof(pidfd_getfd_args) */ + __u32 fd; /* the tracee's file descriptor to get */ + __u32 flags; +}; + +#define PIDFD_IOC_MAGIC 'p' +#define PIDFD_IO(nr) _IO(PIDFD_IOC_MAGIC, nr) +#define PIDFD_IOR(nr, type) _IOR(PIDFD_IOC_MAGIC, nr, type) +#define PIDFD_IOW(nr, type) _IOW(PIDFD_IOC_MAGIC, nr, type) +#define PIDFD_IOWR(nr, type) _IOWR(PIDFD_IOC_MAGIC, nr, type) + +#define PIDFD_IOCTL_GETFD PIDFD_IOWR(0xb0, \ + struct pidfd_getfd_args) + +#endif /* _UAPI_LINUX_PID_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 6cabc124378c..d9971e664e82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1726,9 +1726,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_getfd(struct pid *pid, struct pidfd_getfd_args __user *buf) +{ + struct pidfd_getfd_args args; + unsigned int fd_flags = 0; + struct task_struct *task; + struct file *file; + u32 user_size; + int ret, fd; + + ret = get_user(user_size, &buf->size); + if (ret) + return ret; + + ret = copy_struct_from_user(&args, sizeof(args), buf, user_size); + if (ret) + return ret; + if ((args.flags & ~(PIDFD_GETFD_CLOEXEC)) != 0) + return -EINVAL; + if (args.flags & PIDFD_GETFD_CLOEXEC) + fd_flags |= O_CLOEXEC; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + ret = -EPERM; + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + goto out; + ret = -EBADF; + file = fget_task(task, args.fd); + if (!file) + goto out; + + fd = get_unused_fd_flags(fd_flags); + if (fd < 0) { + ret = fd; + goto out_put_file; + } + /* + * security_file_receive must come last since it may have side effects + * and cannot be reversed. + */ + ret = security_file_receive(file); + if (ret) + goto out_put_fd; + + fd_install(fd, file); + put_task_struct(task); + return fd; + +out_put_fd: + put_unused_fd(fd); +out_put_file: + fput(file); +out: + put_task_struct(task); + return ret; +} + +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pid *pid = file->private_data; + void __user *buf = (void __user *)arg; + + switch (cmd) { + case PIDFD_IOCTL_GETFD: + return pidfd_getfd(pid, buf); + default: + return -EINVAL; + } +} + const struct file_operations pidfd_fops = { .release = pidfd_release, .poll = pidfd_poll, + .unlocked_ioctl = pidfd_ioctl, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif -- 2.20.1 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers