On Mon, Dec 16, 2019 at 5:50 PM Christian Brauner <christian.brauner@xxxxxxxxxx> wrote: > > [Cc Arnd since he fiddled with ioctl()s quite a bit recently.] > > > That should be pidfd.h and the resulting new file be placed under the > pidfd entry in maintainers: > +F: include/uapi/linux/pidfd.h > > > > > enum pid_type > > { > > diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h > > new file mode 100644 > > index 000000000000..4ec02ed8b39a > > --- /dev/null > > +++ b/include/uapi/linux/pid.h > > @@ -0,0 +1,26 @@ > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > > +#ifndef _UAPI_LINUX_PID_H > > +#define _UAPI_LINUX_PID_H > > + > > +#include <linux/types.h> > > +#include <linux/ioctl.h> > > + > > +/* options to pass in to pidfd_getfd_args flags */ > > +#define PIDFD_GETFD_CLOEXEC (1 << 0) /* open the fd with cloexec */ > > Please, make them cloexec by default unless there's a very good reason > not to. > For now then, should I have flags, and just say "reserved for future usage", or would you prefer that I drop flags entirely? > > + > > +struct pidfd_getfd_args { > > + __u32 size; /* sizeof(pidfd_getfd_args) */ > > + __u32 fd; /* the tracee's file descriptor to get */ > > + __u32 flags; > > +}; > > I think you want to either want to pad this > > +struct pidfd_getfd_args { > + __u32 size; /* sizeof(pidfd_getfd_args) */ > + __u32 fd; /* the tracee's file descriptor to get */ > + __u32 flags; > __u32 reserved; > +}; > > or use __aligned_u64 everywhere which I'd personally prefer instead of > this manual padding everywhere. > Wouldn't __attribute__((packed)) achieve a similar thing of making sure the struct is a constant size across all compilers? I'll go with __aligned_u64 instead of packed, if you don't want to use packed. > > + > > +#define PIDFD_IOC_MAGIC 'p' > > +#define PIDFD_IO(nr) _IO(PIDFD_IOC_MAGIC, nr) > > +#define PIDFD_IOR(nr, type) _IOR(PIDFD_IOC_MAGIC, nr, type) > > +#define PIDFD_IOW(nr, type) _IOW(PIDFD_IOC_MAGIC, nr, type) > > +#define PIDFD_IOWR(nr, type) _IOWR(PIDFD_IOC_MAGIC, nr, type) > > + > > +#define PIDFD_IOCTL_GETFD PIDFD_IOWR(0xb0, \ > > + struct pidfd_getfd_args) > > + > > +#endif /* _UAPI_LINUX_PID_H */ > > diff --git a/kernel/fork.c b/kernel/fork.c > > index 6cabc124378c..d9971e664e82 100644 > > --- a/kernel/fork.c > > +++ b/kernel/fork.c > > @@ -1726,9 +1726,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) > > return poll_flags; > > } > > > > +static long pidfd_getfd(struct pid *pid, struct pidfd_getfd_args __user *buf) > > +{ > > + struct pidfd_getfd_args args; > > + unsigned int fd_flags = 0; > > + struct task_struct *task; > > + struct file *file; > > + u32 user_size; > > + int ret, fd; > > + > > + ret = get_user(user_size, &buf->size); > > + if (ret) > > + return ret; > > + > > + ret = copy_struct_from_user(&args, sizeof(args), buf, user_size); > > + if (ret) > > + return ret; > > + if ((args.flags & ~(PIDFD_GETFD_CLOEXEC)) != 0) > > + return -EINVAL; > > Nit: It's more common - especially in this file - to do > > if (args.flags & ~PIDFD_GETFD_CLOEXEC) > return -EINVAL; > > > + if (args.flags & PIDFD_GETFD_CLOEXEC) > > + fd_flags |= O_CLOEXEC; > > + I'll drop this bit, and just make it CLOEXEC by default. > > + task = get_pid_task(pid, PIDTYPE_PID); > > + if (!task) > > + return -ESRCH; > > \n > > > + ret = -EPERM; > > + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) > > + goto out; > > \n > > Please don't pre-set errors unless they are used by multiple exit paths. > if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { > ret = -EPERM; > goto out; > } > > > + ret = -EBADF; > > + file = fget_task(task, args.fd); > > + if (!file) > > + goto out; > > Same. > > > + > > + fd = get_unused_fd_flags(fd_flags); > > + if (fd < 0) { > > + ret = fd; > > + goto out_put_file; > > + } > > \n > > > + /* > > + * security_file_receive must come last since it may have side effects > > + * and cannot be reversed. > > + */ > > + ret = security_file_receive(file); > > + if (ret) > > + goto out_put_fd; > > + > > + fd_install(fd, file); > > + put_task_struct(task); > > + return fd; > > + > > +out_put_fd: > > + put_unused_fd(fd); > > +out_put_file: > > + fput(file); > > +out: > > + put_task_struct(task); > > + return ret; > > +} > > + > > +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > > +{ > > + struct pid *pid = file->private_data; > > + void __user *buf = (void __user *)arg; > > + > > + switch (cmd) { > > + case PIDFD_IOCTL_GETFD: > > + return pidfd_getfd(pid, buf); > > + default: > > + return -EINVAL; > > + } > > +} > > + > > const struct file_operations pidfd_fops = { > > .release = pidfd_release, > > .poll = pidfd_poll, > > + .unlocked_ioctl = pidfd_ioctl, > > #ifdef CONFIG_PROC_FS > > .show_fdinfo = pidfd_show_fdinfo, > > #endif > > -- > > 2.20.1 > >