On Fri, Nov 01, 2024 at 01:54:52PM +0000, Erin Shepherd wrote: > This enables userspace to use name_to_handle_at to recover a pidfd > to a process. > > We stash the process' PID in the root pid namespace inside the handle, > and use that to recover the pid (validating that pid->ino matches the > value in the handle, i.e. that the pid has not been reused). > > We use the root namespace in order to ensure that file handles can be > moved across namespaces; however, we validate that the PID exists in > the current namespace before returning the inode. > > Signed-off-by: Erin Shepherd <erin.shepherd@xxxxxx> > --- > fs/pidfs.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 43 insertions(+), 7 deletions(-) > > diff --git a/fs/pidfs.c b/fs/pidfs.c > index c8e7e9011550..2d66610ef385 100644 > --- a/fs/pidfs.c > +++ b/fs/pidfs.c > @@ -348,23 +348,59 @@ static const struct dentry_operations pidfs_dentry_operations = { > .d_prune = stashed_dentry_prune, > }; > > -static int pidfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, > +#define PIDFD_FID_LEN 3 > + > +struct pidfd_fid { > + u64 ino; > + s32 pid; > +} __packed; > + > +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, > struct inode *parent) > { > struct pid *pid = inode->i_private; > - > - if (*max_len < 2) { > - *max_len = 2; > + struct pidfd_fid *fid = (struct pidfd_fid *)fh; > + > + if (*max_len < PIDFD_FID_LEN) { > + *max_len = PIDFD_FID_LEN; > return FILEID_INVALID; > } > > - *max_len = 2; > - *(u64 *)fh = pid->ino; > - return FILEID_KERNFS; > + fid->ino = pid->ino; > + fid->pid = pid_nr(pid); Hm, a pidfd comes in two flavours: (1) thread-group leader pidfd: pidfd_open(<pid>, 0) (2) thread pidfd: pidfd_open(<pid>, PIDFD_THREAD) In your current scheme fid->pid = pid_nr(pid) means that you always encode a pidfs file handle for a thread pidfd no matter if the provided pidfd was a thread-group leader pidfd or a thread pidfd. This is very likely wrong as it means users that use a thread-group pidfd get a thread-specific pid back. I think we need to encode (1) and (2) in the pidfs file handle so users always get back the correct type of pidfd. That very likely means name_to_handle_at() needs to encode this into the pidfs file handle. We need to think a bit how to do this as we need access to the file so we can tell (1) and (2) apart. It shouldn't be that big of a deal. For pidfds we don't need any path-based lookup anyway. IOW, AT_EMPTY_PATH is the only valid case. Starting with v6.13 we'll have getname_maybe_null() so access to the file is roughly: struct path path; struct filename *fname; unsigned in f_flags = 0; fname = getname_maybe_null(name, flag & AT_EMPTY_PATH); if (fname) { ret = filename_lookup(dfd, fname, lookup_flags, &path, NULL); if (ret) return ret; } else { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; path = fd_file(f)->f_path; if (pidfd_pid(fd_file(f)) f_flags = fd_file(f)->f_flags; path_get(&path); } and then a thread pidfd is reconginzable as f_flags & PIDFD_THREAD/O_EXCL. The question again is how to plumb this through to the export_operations encoding function. > + *max_len = PIDFD_FID_LEN; > + return FILEID_INO64_GEN; > +} > + > +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, > + struct fid *gen_fid, > + int fh_len, int fh_type) > +{ > + int ret; > + struct path path; > + struct pidfd_fid *fid = (struct pidfd_fid *)gen_fid; > + struct pid *pid; > + > + if (fh_type != FILEID_INO64_GEN || fh_len < PIDFD_FID_LEN) > + return NULL; > + > + pid = find_get_pid_ns(fid->pid, &init_pid_ns); > + if (!pid || pid->ino != fid->ino || pid_vnr(pid) == 0) { > + put_pid(pid); > + return NULL; > + } I think we can avoid the premature reference bump and do: scoped_guard(rcu) { struct pid *pid; pid = find_pid_ns(fid->pid, &init_pid_ns); if (!pid) return NULL; /* Did the pid get recycled? */ if (pid->ino != fid->ino) return NULL; /* Must be resolvable in the caller's pid namespace. */ if (pid_vnr(pid) == 0) return NULL; /* Ok, this is the pid we want. */ get_pid(pid); } > + > + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); > + if (ret < 0) > + return ERR_PTR(ret); > + > + mntput(path.mnt); > + return path.dentry; > } > > static const struct export_operations pidfs_export_operations = { > .encode_fh = pidfs_encode_fh, > + .fh_to_dentry = pidfs_fh_to_dentry, > }; > > static int pidfs_init_inode(struct inode *inode, void *data) > -- > 2.46.1 >