Re: [PATCH v13 02/10] fuse: Definitions and ioctl for passthrough

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, May 19, 2023 at 6:13 PM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
>
> On Fri, 19 May 2023 at 14:57, Amir Goldstein <amir73il@xxxxxxxxx> wrote:
> >
> > From: Alessio Balsini <balsini@xxxxxxxxxxx>
> >
> > Expose the FUSE_PASSTHROUGH capability to user space and declare all the
> > basic data structures and functions as the skeleton on top of which the
> > FUSE passthrough functionality will be built.
> >
> > As part of this, introduce the new FUSE passthrough ioctl, which allows
> > the FUSE daemon to specify a direct connection between a FUSE file and a
> > backing file.  The ioctl requires user space to pass the file descriptor
> > of one of its opened files to the FUSE driver and get an id in return.
> > This id may be passed in a reply to OPEN with flag FOPEN_PASSTHROUGH
> > to setup passthrough of read/write operations on the open file.
> >
> > Also, add the passthrough functions for the set-up and tear-down of the
> > data structures and locks that will be used both when fuse_conns and
> > fuse_files are created/deleted.
> >
> > Signed-off-by: Alessio Balsini <balsini@xxxxxxxxxxx>
> > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
> > ---
> >  fs/fuse/Makefile          |  1 +
> >  fs/fuse/dev.c             | 33 ++++++++++++++++++++++++--------
> >  fs/fuse/dir.c             |  7 ++++++-
> >  fs/fuse/file.c            | 17 +++++++++++++----
> >  fs/fuse/fuse_i.h          | 27 ++++++++++++++++++++++++++
> >  fs/fuse/inode.c           | 21 +++++++++++++++++++-
> >  fs/fuse/passthrough.c     | 40 +++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/fuse.h | 13 +++++++++++--
> >  8 files changed, 143 insertions(+), 16 deletions(-)
> >  create mode 100644 fs/fuse/passthrough.c
> >
> > diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
> > index 0c48b35c058d..d9e1b47382f3 100644
> > --- a/fs/fuse/Makefile
> > +++ b/fs/fuse/Makefile
> > @@ -8,6 +8,7 @@ obj-$(CONFIG_CUSE) += cuse.o
> >  obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
> >
> >  fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
> > +fuse-y += passthrough.o
> >  fuse-$(CONFIG_FUSE_DAX) += dax.o
> >
> >  virtiofs-y := virtio_fs.o
> > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > index 1a8f82f478cb..cb00234e7843 100644
> > --- a/fs/fuse/dev.c
> > +++ b/fs/fuse/dev.c
> > @@ -2255,16 +2255,19 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
> >                            unsigned long arg)
> >  {
> >         int res;
> > -       int oldfd;
> > -       struct fuse_dev *fud = NULL;
> > +       int fd, id;
> > +       struct fuse_dev *fud = fuse_get_dev(file);
>
> This is broken, see below.

IIUC, *this* is not broken for the new ioctls...

>
> >         struct fd f;
> >
> > +       if (!fud)
> > +               return -EINVAL;
> > +
> >         switch (cmd) {
> >         case FUSE_DEV_IOC_CLONE:
> > -               if (get_user(oldfd, (__u32 __user *)arg))
> > +               if (get_user(fd, (__u32 __user *)arg))
> >                         return -EFAULT;
> >
> > -               f = fdget(oldfd);
> > +               f = fdget(fd);
> >                 if (!f.file)
> >                         return -EINVAL;
> >
> > @@ -2272,17 +2275,31 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
> >                  * Check against file->f_op because CUSE
> >                  * uses the same ioctl handler.
> >                  */
> > -               if (f.file->f_op == file->f_op)
> > -                       fud = fuse_get_dev(f.file);
> > -
> >                 res = -EINVAL;
> > -               if (fud) {
> > +               if (f.file->f_op == file->f_op) {

and this can be fixed by adding:
 +                           fud = fuse_get_dev(f.file);

> >                         mutex_lock(&fuse_mutex);
> >                         res = fuse_device_clone(fud->fc, file);
>
> We are cloning f.file into file not the other way round.  So fud must
> come from f.file.
>
>
> >                         mutex_unlock(&fuse_mutex);
> >                 }
> >                 fdput(f);
> >                 break;
> > +       case FUSE_DEV_IOC_PASSTHROUGH_OPEN:
> > +               if (get_user(fd, (__u32 __user *)arg))
> > +                       return -EFAULT;
> > +
> > +               f = fdget(fd);
> > +               if (!f.file)
> > +                       return -EINVAL;
> > +
> > +               res = fuse_passthrough_open(fud->fc, fd);
> > +               fdput(f);
> > +               break;
> > +       case FUSE_DEV_IOC_PASSTHROUGH_CLOSE:
> > +               if (get_user(id, (__u32 __user *)arg))
> > +                       return -EFAULT;
> > +
> > +               res = fuse_passthrough_close(fud->fc, id);
> > +               break;
> >         default:
> >                 res = -ENOTTY;
> >                 break;
> > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > index 35bc174f9ba2..1894298e7f7a 100644
> > --- a/fs/fuse/dir.c
> > +++ b/fs/fuse/dir.c
> > @@ -619,6 +619,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
> >  {
> >         int err;
> >         struct inode *inode;
> > +       struct fuse_conn *fc = get_fuse_conn(dir);
> >         struct fuse_mount *fm = get_fuse_mount(dir);
> >         FUSE_ARGS(args);
> >         struct fuse_forget_link *forget;
> > @@ -700,7 +701,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
> >         d_instantiate(entry, inode);
> >         fuse_change_entry_timeout(entry, &outentry);
> >         fuse_dir_changed(dir);
> > -       err = finish_open(file, entry, generic_file_open);
> > +       err = 0;
> > +       if (ff->open_flags & FOPEN_PASSTHROUGH)
> > +               err = fuse_passthrough_setup(fc, ff, &outopen);
> > +       if (!err)
> > +               err = finish_open(file, entry, generic_file_open);
> >         if (err) {
> >                 fi = get_fuse_inode(inode);
> >                 fuse_sync_release(fi, ff, flags);
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index 89d97f6188e0..96a46a5aa892 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -132,6 +132,7 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
> >         struct fuse_conn *fc = fm->fc;
> >         struct fuse_file *ff;
> >         int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
> > +       int err;
> >
> >         ff = fuse_file_alloc(fm);
> >         if (!ff)
> > @@ -142,16 +143,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
> >         ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
> >         if (isdir ? !fc->no_opendir : !fc->no_open) {
> >                 struct fuse_open_out outarg;
> > -               int err;
> >
> >                 err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
> >                 if (!err) {
> >                         ff->fh = outarg.fh;
> >                         ff->open_flags = outarg.open_flags;
> > -
> > +                       if (ff->open_flags & FOPEN_PASSTHROUGH)
> > +                               err = fuse_passthrough_setup(fc, ff, &outarg);
> > +                       if (err)
> > +                               goto out_free_ff;
> >                 } else if (err != -ENOSYS) {
> > -                       fuse_file_free(ff);
> > -                       return ERR_PTR(err);
> > +                       goto out_free_ff;
> >                 } else {
> >                         if (isdir)
> >                                 fc->no_opendir = 1;
> > @@ -166,6 +168,10 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
> >         ff->nodeid = nodeid;
> >
> >         return ff;
> > +
> > +out_free_ff:
> > +       fuse_file_free(ff);
> > +       return ERR_PTR(err);
> >  }
> >
> >  int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
> > @@ -279,6 +285,9 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
> >         struct fuse_conn *fc = ff->fm->fc;
> >         struct fuse_release_args *ra = ff->release_args;
> >
> > +       fuse_passthrough_put(ff->passthrough);
> > +       ff->passthrough = NULL;
> > +
> >         /* Inode is NULL on error path of fuse_create_open() */
> >         if (likely(fi)) {
> >                 spin_lock(&fi->lock);
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index 9b7fc7d3c7f1..f52604534ff6 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -173,6 +173,16 @@ struct fuse_conn;
> >  struct fuse_mount;
> >  struct fuse_release_args;
> >
> > +/**
> > + * Reference to backing file for read/write operations in passthrough mode.
> > + */
> > +struct fuse_passthrough {
> > +       struct file *filp;
> > +
> > +       /** refcount */
> > +       refcount_t count;
> > +};
> > +
> >  /** FUSE specific file data */
> >  struct fuse_file {
> >         /** Fuse connection for this file */
> > @@ -218,6 +228,9 @@ struct fuse_file {
> >
> >         } readdir;
> >
> > +       /** Container for data related to the passthrough functionality */
> > +       struct fuse_passthrough *passthrough;
> > +
> >         /** RB node to be linked on fuse_conn->polled_files */
> >         struct rb_node polled_node;
> >
> > @@ -792,6 +805,9 @@ struct fuse_conn {
> >         /* Is tmpfile not implemented by fs? */
> >         unsigned int no_tmpfile:1;
> >
> > +       /** Passthrough mode for read/write IO */
> > +       unsigned int passthrough:1;
> > +
> >         /** The number of requests waiting for completion */
> >         atomic_t num_waiting;
> >
> > @@ -841,6 +857,9 @@ struct fuse_conn {
> >
> >         /* New writepages go into this bucket */
> >         struct fuse_sync_bucket __rcu *curr_bucket;
> > +
> > +       /** IDR for passthrough files */
> > +       struct idr passthrough_files_map;
> >  };
> >
> >  /*
> > @@ -1324,4 +1343,12 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
> >  void fuse_file_release(struct inode *inode, struct fuse_file *ff,
> >                        unsigned int open_flags, fl_owner_t id, bool isdir);
> >
> > +/* passthrough.c */
> > +int fuse_passthrough_open(struct fuse_conn *fc, int backing_fd);
> > +int fuse_passthrough_close(struct fuse_conn *fc, int passthrough_fh);
> > +int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
> > +                          struct fuse_open_out *openarg);
> > +void fuse_passthrough_put(struct fuse_passthrough *passthrough);
> > +void fuse_passthrough_free(struct fuse_passthrough *passthrough);
> > +
> >  #endif /* _FS_FUSE_I_H */
> > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> > index d66070af145d..271586fac008 100644
> > --- a/fs/fuse/inode.c
> > +++ b/fs/fuse/inode.c
> > @@ -840,6 +840,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
> >         INIT_LIST_HEAD(&fc->bg_queue);
> >         INIT_LIST_HEAD(&fc->entry);
> >         INIT_LIST_HEAD(&fc->devices);
> > +       idr_init(&fc->passthrough_files_map);
> >         atomic_set(&fc->num_waiting, 0);
> >         fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
> >         fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
> > @@ -1209,6 +1210,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
> >                                 fc->init_security = 1;
> >                         if (flags & FUSE_CREATE_SUPP_GROUP)
> >                                 fc->create_supp_group = 1;
> > +                       if (flags & FUSE_PASSTHROUGH) {
> > +                               fc->passthrough = 1;
> > +                               /* Prevent further stacking */
> > +                               fm->sb->s_stack_depth =
> > +                                       FILESYSTEM_MAX_STACK_DEPTH;
> > +                       }
>
> Seems too restrictive.  We could specify the max stacking depth in the
> protocol and verify that when registering the passthrough file.
>
> I.e. fuse_sb->s_stack_depth of
>
> 0 -> passthrough disabled
> 1 -> backing_sb->s_stack_depth == 0
> 2 -> backing_sb->stack_depth <= 1
> ...
>

Ok. Let's see.
What do we stand to gain from the ability to setup nax stacking depth?

We could use it to setup an overlayfs with lower FUSE that allows passthrough
fds to a non-stacked backing fs and we could use it to setup FUSE that allows
passthrough fds to overlayfs.

I pity the FUSE userspace developers that will need to understand this
setup parameter...
So ignoring the possibility of  FILESYSTEM_MAX_STACK_DEPTH changing in
the future, maybe better describe this with two capability flags
instead of an int?

FUSE_PASSTHROUGH_NESTED ???

Would NESTED mean max depth 1 or 2 through? ;-)

> >                 } else {
> >                         ra_pages = fc->max_read / PAGE_SIZE;
> >                         fc->no_lock = 1;
> > @@ -1254,7 +1261,8 @@ void fuse_send_init(struct fuse_mount *fm)
> >                 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
> >                 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
> >                 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
> > -               FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP;
> > +               FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
> > +               FUSE_PASSTHROUGH;
> >  #ifdef CONFIG_FUSE_DAX
> >         if (fm->fc->dax)
> >                 flags |= FUSE_MAP_ALIGNMENT;
> > @@ -1287,9 +1295,20 @@ void fuse_send_init(struct fuse_mount *fm)
> >  }
> >  EXPORT_SYMBOL_GPL(fuse_send_init);
> >
> > +static int fuse_passthrough_id_free(int id, void *p, void *data)
> > +{
> > +       struct fuse_passthrough *passthrough = p;
> > +
> > +       WARN_ON_ONCE(refcount_read(&passthrough->count) != 1);
> > +       fuse_passthrough_free(passthrough);
> > +       return 0;
> > +}
> > +
> >  void fuse_free_conn(struct fuse_conn *fc)
> >  {
> >         WARN_ON(!list_empty(&fc->devices));
> > +       idr_for_each(&fc->passthrough_files_map, fuse_passthrough_id_free, NULL);
> > +       idr_destroy(&fc->passthrough_files_map);
> >         kfree_rcu(fc, rcu);
> >  }
> >  EXPORT_SYMBOL_GPL(fuse_free_conn);
> > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> > new file mode 100644
> > index 000000000000..fc723e004de9
> > --- /dev/null
> > +++ b/fs/fuse/passthrough.c
> > @@ -0,0 +1,40 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +

Alessio et. al.
I noticed that this file does not have any copyright in your original patch.

Do you mind if I retroactively add:

/*
 * FUSE passthrough support.
 *
 * Copyright (c) 2021 Google LLC.
 * Copyright (c) 2023 CTERA Networks.
 */

Thanks,
Amir.




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux