On Fri, Nov 29, 2024 at 2:39 PM Christian Brauner <brauner@xxxxxxxxxx> wrote: > > On 64-bit platforms, userspace can read the pidfd's inode in order to > get a never-repeated PID identifier. On 32-bit platforms this identifier > is not exposed, as inodes are limited to 32 bits. Instead expose the > identifier via export_fh, which makes it available to userspace via > name_to_handle_at. > > In addition we implement fh_to_dentry, which allows userspace to > recover a pidfd from a pidfs file handle. > > Signed-off-by: Erin Shepherd <erin.shepherd@xxxxxx> > [brauner: patch heavily rewritten] > Co-Developed-by: Christian Brauner <brauner@xxxxxxxxxx> > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> > --- > fs/fhandle.c | 34 +++++++++++---------- > fs/pidfs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 115 insertions(+), 15 deletions(-) > > diff --git a/fs/fhandle.c b/fs/fhandle.c > index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644 > --- a/fs/fhandle.c > +++ b/fs/fhandle.c > @@ -268,20 +268,6 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, > return 0; > } > > -/* > - * Allow relaxed permissions of file handles if the caller has the > - * ability to mount the filesystem or create a bind-mount of the > - * provided @mountdirfd. > - * > - * In both cases the caller may be able to get an unobstructed way to > - * the encoded file handle. If the caller is only able to create a > - * bind-mount we need to verify that there are no locked mounts on top > - * of it that could prevent us from getting to the encoded file. > - * > - * In principle, locked mounts can prevent the caller from mounting the > - * filesystem but that only applies to procfs and sysfs neither of which > - * support decoding file handles. > - */ > static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > unsigned int o_flags) > { > @@ -291,6 +277,19 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > return true; > > /* > + * Allow relaxed permissions of file handles if the caller has the > + * ability to mount the filesystem or create a bind-mount of the > + * provided @mountdirfd. > + * > + * In both cases the caller may be able to get an unobstructed way to > + * the encoded file handle. If the caller is only able to create a > + * bind-mount we need to verify that there are no locked mounts on top > + * of it that could prevent us from getting to the encoded file. > + * > + * In principle, locked mounts can prevent the caller from mounting the > + * filesystem but that only applies to procfs and sysfs neither of which > + * support decoding file handles. > + * Belongs in patch 4 > * Restrict to O_DIRECTORY to provide a deterministic API that avoids a > * confusing api in the face of disconnected non-dir dentries. > * > @@ -397,6 +396,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, > long retval = 0; > struct path path __free(path_put) = {}; > struct file *file; > + const struct export_operations *eops; > > retval = handle_to_path(mountdirfd, ufh, &path, open_flag); > if (retval) > @@ -406,7 +406,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, > if (fd < 0) > return fd; > > - file = file_open_root(&path, "", open_flag, 0); > + eops = path.mnt->mnt_sb->s_export_op; > + if (eops->open) > + file = eops->open(&path, open_flag); > + else > + file = file_open_root(&path, "", open_flag, 0); Belongs in patch 3 > if (IS_ERR(file)) > return PTR_ERR(file); > > diff --git a/fs/pidfs.c b/fs/pidfs.c > index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644 > --- a/fs/pidfs.c > +++ b/fs/pidfs.c > @@ -1,5 +1,6 @@ > // SPDX-License-Identifier: GPL-2.0 > #include <linux/anon_inodes.h> > +#include <linux/exportfs.h> > #include <linux/file.h> > #include <linux/fs.h> > #include <linux/cgroup.h> > @@ -454,6 +455,100 @@ static const struct dentry_operations pidfs_dentry_operations = { > .d_prune = stashed_dentry_prune, > }; > > +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, > + struct inode *parent) > +{ > + struct pid *pid = inode->i_private; > + > + if (*max_len < 2) { > + *max_len = 2; > + return FILEID_INVALID; > + } > + > + *max_len = 2; > + *(u64 *)fh = pid->ino; > + return FILEID_KERNFS; > +} > + > +/* Find a struct pid based on the inode number. */ > +static struct pid *pidfs_ino_get_pid(u64 ino) > +{ > + ino_t pid_ino = pidfs_ino(ino); > + u32 gen = pidfs_gen(ino); > + struct pid *pid; > + > + guard(rcu)(); > + > + /* Handle @pid lookup carefully so there's no risk of UAF. */ > + pid = idr_find(&pidfs_ino_idr, (u32)pid_ino); > + if (!pid) > + return NULL; > + > + if (sizeof(ino_t) < sizeof(u64)) { Not sure why the two cases are needed. Isn't this enough? if (pidfs_ino(pid->ino) != pid_ino || pidfs_gen(pid->ino) != gen) pid = NULL; > + if (gen && pidfs_gen(pid->ino) != gen) > + pid = NULL; > + } else { > + if (pidfs_ino(pid->ino) != pid_ino) > + pid = NULL; > + } > + > + /* Within our pid namespace hierarchy? */ > + if (pid_vnr(pid) == 0) > + pid = NULL; > + > + return get_pid(pid); > +} > + > +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, > + struct fid *fid, int fh_len, > + int fh_type) > +{ > + int ret; > + u64 pid_ino; > + struct path path; > + struct pid *pid; > + > + if (fh_len < 2) > + return NULL; > + > + switch (fh_type) { > + case FILEID_KERNFS: > + pid_ino = *(u64 *)fid; > + break; > + default: > + return NULL; > + } > + > + pid = pidfs_ino_get_pid(pid_ino); > + if (!pid) > + return NULL; > + > + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); > + if (ret < 0) > + return ERR_PTR(ret); > + > + mntput(path.mnt); > + return path.dentry; > +} > + > +static int pidfs_export_permission(struct handle_to_path_ctx *ctx, > + unsigned int oflags) > +{ This deserves a comment to explain why no permissions are required. > + return 0; > +} > + > +static struct file *pidfs_export_open(struct path *path, unsigned int oflags) > +{ > + return dentry_open(path, oflags | O_RDWR, current_cred()); Why is O_RDWR needed here? perhaps a comment to explain. Thanks, Amir.