Re: [PATCH RFC 6/6] pidfs: implement file handle support

Amir Goldstein <amir73il@xxxxxxxxx> · Fri, 29 Nov 2024 15:52:44 +0100

On Fri, Nov 29, 2024 at 2:39 PM Christian Brauner <brauner@xxxxxxxxxx> wrote:
>
> On 64-bit platforms, userspace can read the pidfd's inode in order to
> get a never-repeated PID identifier. On 32-bit platforms this identifier
> is not exposed, as inodes are limited to 32 bits. Instead expose the
> identifier via export_fh, which makes it available to userspace via
> name_to_handle_at.
>
> In addition we implement fh_to_dentry, which allows userspace to
> recover a pidfd from a pidfs file handle.
>
> Signed-off-by: Erin Shepherd <erin.shepherd@xxxxxx>
> [brauner: patch heavily rewritten]
> Co-Developed-by: Christian Brauner <brauner@xxxxxxxxxx>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
>  fs/fhandle.c | 34 +++++++++++----------
>  fs/pidfs.c   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 115 insertions(+), 15 deletions(-)
>
> diff --git a/fs/fhandle.c b/fs/fhandle.c
> index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644
> --- a/fs/fhandle.c
> +++ b/fs/fhandle.c
> @@ -268,20 +268,6 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
>         return 0;
>  }
>
> -/*
> - * Allow relaxed permissions of file handles if the caller has the
> - * ability to mount the filesystem or create a bind-mount of the
> - * provided @mountdirfd.
> - *
> - * In both cases the caller may be able to get an unobstructed way to
> - * the encoded file handle. If the caller is only able to create a
> - * bind-mount we need to verify that there are no locked mounts on top
> - * of it that could prevent us from getting to the encoded file.
> - *
> - * In principle, locked mounts can prevent the caller from mounting the
> - * filesystem but that only applies to procfs and sysfs neither of which
> - * support decoding file handles.
> - */
>  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                                  unsigned int o_flags)
>  {
> @@ -291,6 +277,19 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                 return true;
>
>         /*
> +        * Allow relaxed permissions of file handles if the caller has the
> +        * ability to mount the filesystem or create a bind-mount of the
> +        * provided @mountdirfd.
> +        *
> +        * In both cases the caller may be able to get an unobstructed way to
> +        * the encoded file handle. If the caller is only able to create a
> +        * bind-mount we need to verify that there are no locked mounts on top
> +        * of it that could prevent us from getting to the encoded file.
> +        *
> +        * In principle, locked mounts can prevent the caller from mounting the
> +        * filesystem but that only applies to procfs and sysfs neither of which
> +        * support decoding file handles.
> +        *

Belongs in patch 4

>          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
>          * confusing api in the face of disconnected non-dir dentries.
>          *
> @@ -397,6 +396,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
>         long retval = 0;
>         struct path path __free(path_put) = {};
>         struct file *file;
> +       const struct export_operations *eops;
>
>         retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
>         if (retval)
> @@ -406,7 +406,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
>         if (fd < 0)
>                 return fd;
>
> -       file = file_open_root(&path, "", open_flag, 0);
> +       eops = path.mnt->mnt_sb->s_export_op;
> +       if (eops->open)
> +               file = eops->open(&path, open_flag);
> +       else
> +               file = file_open_root(&path, "", open_flag, 0);

Belongs in patch 3

>         if (IS_ERR(file))
>                 return PTR_ERR(file);
>
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <linux/anon_inodes.h>
> +#include <linux/exportfs.h>
>  #include <linux/file.h>
>  #include <linux/fs.h>
>  #include <linux/cgroup.h>
> @@ -454,6 +455,100 @@ static const struct dentry_operations pidfs_dentry_operations = {
>         .d_prune        = stashed_dentry_prune,
>  };
>
> +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
> +                          struct inode *parent)
> +{
> +       struct pid *pid = inode->i_private;
> +
> +       if (*max_len < 2) {
> +               *max_len = 2;
> +               return FILEID_INVALID;
> +       }
> +
> +       *max_len = 2;
> +       *(u64 *)fh = pid->ino;
> +       return FILEID_KERNFS;
> +}
> +
> +/* Find a struct pid based on the inode number. */
> +static struct pid *pidfs_ino_get_pid(u64 ino)
> +{
> +       ino_t pid_ino = pidfs_ino(ino);
> +       u32 gen = pidfs_gen(ino);
> +       struct pid *pid;
> +
> +       guard(rcu)();
> +
> +       /* Handle @pid lookup carefully so there's no risk of UAF. */
> +       pid = idr_find(&pidfs_ino_idr, (u32)pid_ino);
> +       if (!pid)
> +               return NULL;
> +
> +       if (sizeof(ino_t) < sizeof(u64)) {

Not sure why the two cases are needed. Isn't this enough?

  if (pidfs_ino(pid->ino) != pid_ino || pidfs_gen(pid->ino) != gen)
         pid = NULL;

> +               if (gen && pidfs_gen(pid->ino) != gen)
> +                       pid = NULL;
> +       } else {
> +               if (pidfs_ino(pid->ino) != pid_ino)
> +                       pid = NULL;
> +       }
> +
> +       /* Within our pid namespace hierarchy? */
> +       if (pid_vnr(pid) == 0)
> +               pid = NULL;
> +
> +       return get_pid(pid);
> +}
> +
> +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
> +                                        struct fid *fid, int fh_len,
> +                                        int fh_type)
> +{
> +       int ret;
> +       u64 pid_ino;
> +       struct path path;
> +       struct pid *pid;
> +
> +       if (fh_len < 2)
> +               return NULL;
> +
> +       switch (fh_type) {
> +       case FILEID_KERNFS:
> +               pid_ino = *(u64 *)fid;
> +               break;
> +       default:
> +               return NULL;
> +       }
> +
> +       pid = pidfs_ino_get_pid(pid_ino);
> +       if (!pid)
> +               return NULL;
> +
> +       ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
> +       if (ret < 0)
> +               return ERR_PTR(ret);
> +
> +       mntput(path.mnt);
> +       return path.dentry;
> +}
> +
> +static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
> +                                  unsigned int oflags)
> +{

This deserves a comment to explain why no permissions are required.

> +       return 0;
> +}
> +
> +static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
> +{
> +       return dentry_open(path, oflags | O_RDWR, current_cred());

Why is O_RDWR needed here? perhaps a comment to explain.

Thanks,
Amir.