On Fri, Oct 11, 2024 at 4:06 AM Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> wrote: > > The means by which a pid is determined from a pidfd is duplicated, with > some callers holding a reference to the (pid)fd, and others explicitly > pinning the pid. > > Introduce __pidfd_get_pid() which abstracts both approaches and provide > optional output parameters for file->f_flags and the fd (the latter of > which, if provided, prevents the function from decrementing the fd's > refernce count). > > Additionally, allow the ability to open a pidfd by opening a /proc/<pid> > directory, utilised by the pidfd_send_signal() system call, providing a > pidfd_get_pid_proc() helper function to do so. > > Doing this allows us to eliminate open-coded pidfd pid lookup and to > consistently handle this in one place. > > This lays the groundwork for a subsequent patch which adds a new sentinel > pidfd to explicitly reference the current process (i.e. thread group > leader) without the need for a pidfd. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> > --- > include/linux/pid.h | 42 +++++++++++++++++++++++++++++++- > kernel/pid.c | 58 ++++++++++++++++++++++++++++++--------------- > kernel/signal.c | 22 ++++------------- > 3 files changed, 84 insertions(+), 38 deletions(-) > > diff --git a/include/linux/pid.h b/include/linux/pid.h > index a3aad9b4074c..68b02eab7509 100644 > --- a/include/linux/pid.h > +++ b/include/linux/pid.h > @@ -2,6 +2,7 @@ > #ifndef _LINUX_PID_H > #define _LINUX_PID_H > > +#include <linux/file.h> > #include <linux/pid_types.h> > #include <linux/rculist.h> > #include <linux/rcupdate.h> > @@ -72,8 +73,47 @@ extern struct pid init_struct_pid; > > struct file; > > + > +/** > + * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd. > + * > + * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if > + * @alloc_proc is also set. > + * @pin_pid: If set, then the reference counter of the returned pid is > + * incremented. If not set, then @fd should be provided to pin the > + * pidfd. > + * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead > + * of a pidfd, and this will be used to determine the pid. > + * @flags: Output variable, if non-NULL, then the file->f_flags of the > + * pidfd will be set here. > + * @fd: Output variable, if non-NULL, then the pidfd reference will > + * remain elevated and the caller will need to decrement it > + * themselves. > + * > + * Returns: If successful, the pid associated with the pidfd, otherwise an > + * error. > + */ > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd); > + > +static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ true, > + /* allow_proc = */ false, > + flags, /* fd = */ NULL); > +} > + > +static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd, > + unsigned int *flags, > + struct fd *fd) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ false, > + /* allow_proc = */ true, > + flags, fd); > +} > + > struct pid *pidfd_pid(const struct file *file); > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); > struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); > void do_notify_pidfd(struct task_struct *task); > diff --git a/kernel/pid.c b/kernel/pid.c > index 2715afb77eab..25cc1c36a1b1 100644 > --- a/kernel/pid.c > +++ b/kernel/pid.c > @@ -36,6 +36,7 @@ > #include <linux/pid_namespace.h> > #include <linux/init_task.h> > #include <linux/syscalls.h> > +#include <linux/proc_fs.h> > #include <linux/proc_ns.h> > #include <linux/refcount.h> > #include <linux/anon_inodes.h> > @@ -534,22 +535,46 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) > } > EXPORT_SYMBOL_GPL(find_ge_pid); > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd) > { > - struct fd f; > + struct file *file; > struct pid *pid; > + struct fd f = fdget(pidfd); > > - f = fdget(fd); > - if (!fd_file(f)) > + file = fd_file(f); > + if (!file) > return ERR_PTR(-EBADF); > > - pid = pidfd_pid(fd_file(f)); > - if (!IS_ERR(pid)) { > - get_pid(pid); > - *flags = fd_file(f)->f_flags; > + pid = pidfd_pid(file); > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > + if (IS_ERR(pid) && allow_proc) > + pid = tgid_pidfd_to_pid(file); > + > + if (IS_ERR(pid)) { > + fdput(f); > + return pid; > } > > - fdput(f); > + if (pin_pid) > + get_pid(pid); > + else > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > + > + if (flags) > + *flags = file->f_flags; > + > + /* > + * If the user provides an fd output then it will handle decrementing > + * its reference counter. > + */ > + if (fd) > + *fd = f; > + else > + /* Otherwise we release it. */ > + fdput(f); > + > return pid; > } There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), otherwise __pidfd_get_pid() will not be exported. A module calling pidfd_get_pid() now inlined in the header file will try to call __pidfd_get_pid() and will have trouble resolving this symbol. > > @@ -747,23 +772,18 @@ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, > unsigned int, flags) > { > struct pid *pid; > - struct fd f; > int ret; > > /* flags is currently unused - make sure it's unset */ > if (flags) > return -EINVAL; > > - f = fdget(pidfd); > - if (!fd_file(f)) > - return -EBADF; > - > - pid = pidfd_pid(fd_file(f)); > + pid = pidfd_get_pid(pidfd, NULL); > if (IS_ERR(pid)) > - ret = PTR_ERR(pid); > - else > - ret = pidfd_getfd(pid, fd); > + return PTR_ERR(pid); > > - fdput(f); > + ret = pidfd_getfd(pid, fd); > + > + put_pid(pid); > return ret; > } > diff --git a/kernel/signal.c b/kernel/signal.c > index 4344860ffcac..868bfa674c62 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -3875,17 +3875,6 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, > return copy_siginfo_from_user(kinfo, info); > } > > -static struct pid *pidfd_to_pid(const struct file *file) > -{ > - struct pid *pid; > - > - pid = pidfd_pid(file); > - if (!IS_ERR(pid)) > - return pid; > - > - return tgid_pidfd_to_pid(file); > -} > - > #define PIDFD_SEND_SIGNAL_FLAGS \ > (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ > PIDFD_SIGNAL_PROCESS_GROUP) > @@ -3908,10 +3897,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > siginfo_t __user *, info, unsigned int, flags) > { > int ret; > - struct fd f; > struct pid *pid; > kernel_siginfo_t kinfo; > enum pid_type type; > + unsigned int f_flags; > + struct fd f; > > /* Enforce flags be set to 0 until we add an extension. */ > if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) > @@ -3921,12 +3911,8 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) > return -EINVAL; > > - f = fdget(pidfd); > - if (!fd_file(f)) > - return -EBADF; > - > /* Is this a pidfd? */ > - pid = pidfd_to_pid(fd_file(f)); > + pid = pidfd_to_pid_proc(pidfd, &f_flags, &f); > if (IS_ERR(pid)) { > ret = PTR_ERR(pid); > goto err; > @@ -3939,7 +3925,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > switch (flags) { > case 0: > /* Infer scope from the type of pidfd. */ > - if (fd_file(f)->f_flags & PIDFD_THREAD) > + if (f_flags & PIDFD_THREAD) > type = PIDTYPE_PID; > else > type = PIDTYPE_TGID; > -- > 2.46.2 >