Re: [PATCH] ovl: implement tmpfile

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Apr 25, 2024 at 1:16 PM Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote:
>
> Combine inode creation with opening a file.
>
> There are six separate objects that are being set up: the backing inode,
> dentry and file and the overlay inode, dentry and file.  Cleanup in case of
> an error is a bit of a challenge and is difficult to test, so careful
> review is needed.

Did you try running the xfstests with the t_open_tmpfiles test program?
(generic/530 generic/531)
Note that those tests also run without O_TMPFILE support, so if you run
them you should verify that they do not fall back to unlinked files.

There are also a few tests that require O_TMPFILE support:
_require_xfs_io_command "-T"
(generic/004 generic/389 generic/509)

There are some tests in src/vfs/vfstest that run sgid tests
with O_TMPFILE if it is supported.
I identified generic/696 and generic/697, but only the latter
currently runs on overlayfs.


>
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
> ---
>  fs/backing-file.c            |  23 +++++++
>  fs/internal.h                |   3 +
>  fs/namei.c                   |   6 +-
>  fs/overlayfs/dir.c           | 130 +++++++++++++++++++++++++++++++++++
>  fs/overlayfs/file.c          |   3 -
>  fs/overlayfs/overlayfs.h     |   3 +
>  include/linux/backing-file.h |   4 ++
>  7 files changed, 166 insertions(+), 6 deletions(-)
>
> diff --git a/fs/backing-file.c b/fs/backing-file.c
> index 740185198db3..2dc3f7477d1d 100644
> --- a/fs/backing-file.c
> +++ b/fs/backing-file.c
> @@ -52,6 +52,29 @@ struct file *backing_file_open(const struct path *user_path, int flags,
>  }
>  EXPORT_SYMBOL_GPL(backing_file_open);
>
> +struct file *backing_tmpfile_open(const struct path *user_path, int flags,
> +                                 struct mnt_idmap *real_idmap,
> +                                 const struct path *real_parentpath,
> +                                 umode_t mode, const struct cred *cred)
> +{
> +       struct file *f;
> +       int error;
> +
> +       f = alloc_empty_backing_file(flags, cred);
> +       if (IS_ERR(f))
> +               return f;
> +
> +       path_get(user_path);
> +       *backing_file_user_path(f) = *user_path;
> +       error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);

We do not have a custom idmap in other backing_file helpers
don't see why we need real_idmap in this helper.
I think that should be:
mnt_idmap(real_parentpath.mnt)


> +       if (error) {
> +               fput(f);
> +               f = ERR_PTR(error);
> +       }
> +       return f;
> +}
> +EXPORT_SYMBOL(backing_tmpfile_open);
> +
>  struct backing_aio {
>         struct kiocb iocb;
>         refcount_t ref;
> diff --git a/fs/internal.h b/fs/internal.h
> index 7ca738904e34..ab2225136f60 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -62,6 +62,9 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode);
>  int do_symlinkat(struct filename *from, int newdfd, struct filename *to);
>  int do_linkat(int olddfd, struct filename *old, int newdfd,
>                         struct filename *new, int flags);
> +int vfs_tmpfile(struct mnt_idmap *idmap,
> +               const struct path *parentpath,
> +               struct file *file, umode_t mode);
>
>  /*
>   * namespace.c
> diff --git a/fs/namei.c b/fs/namei.c
> index c5b2a25be7d0..13e50b0a49d2 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3668,9 +3668,9 @@ static int do_open(struct nameidata *nd,
>   * On non-idmapped mounts or if permission checking is to be performed on the
>   * raw inode simply pass @nop_mnt_idmap.
>   */
> -static int vfs_tmpfile(struct mnt_idmap *idmap,
> -                      const struct path *parentpath,
> -                      struct file *file, umode_t mode)
> +int vfs_tmpfile(struct mnt_idmap *idmap,
> +               const struct path *parentpath,
> +               struct file *file, umode_t mode)
>  {
>         struct dentry *child;
>         struct inode *dir = d_inode(parentpath->dentry);
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 0f8b4a719237..91ac268986a9 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -14,6 +14,7 @@
>  #include <linux/posix_acl_xattr.h>
>  #include <linux/atomic.h>
>  #include <linux/ratelimit.h>
> +#include <linux/backing-file.h>
>  #include "overlayfs.h"
>
>  static unsigned short ovl_redirect_max = 256;
> @@ -1290,6 +1291,134 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
>         return err;
>  }
>
> +static int ovl_create_upper_tmpfile(struct file *file, struct dentry *dentry,
> +                                   struct inode *inode, umode_t mode)
> +{
> +       struct ovl_inode_params oip;
> +       struct path realparentpath;
> +       struct file *realfile;
> +       /* It's okay to set O_NOATIME, since the owner will be current fsuid */
> +       int flags = file->f_flags | OVL_OPEN_FLAGS;
> +
> +       ovl_path_upper(dentry->d_parent, &realparentpath);
> +
> +       if (!IS_POSIXACL(d_inode(realparentpath.dentry)))
> +               mode &= ~current_umask();
> +
> +       realfile = backing_tmpfile_open(&file->f_path, flags,
> +                                       &nop_mnt_idmap, &realparentpath, mode,
> +                                       current_cred());

Using &nop_mnt_idmap here is not only unneeded but also looks wrong.

> +       if (IS_ERR(realfile))
> +               return PTR_ERR(realfile);
> +
> +       ovl_dentry_set_upper_alias(dentry);
> +       ovl_dentry_update_reval(dentry, realfile->f_path.dentry);
> +
> +       /* ovl_get_inode() consumes the .upperdentry reference on success */
> +       oip = (struct ovl_inode_params) {
> +               .upperdentry = dget(realfile->f_path.dentry),
> +               .newinode = inode,
> +       };
> +
> +       inode = ovl_get_inode(dentry->d_sb, &oip);
> +       if (IS_ERR(inode))
> +               goto out_err;
> +
> +       /* d_tmpfile() expects inode to have a positive link count */
> +       set_nlink(inode, 1);
> +       d_tmpfile(file, inode);

Any reason not to reuse ovl_instantiate() to avoid duplicating some
of the subtlety? for example:

+       /* ovl_instantiate() consumes the .upperdentry reference on success */
+       dget(realfile->f_path.dentry)
+       err = ovl_instantiate(dentry, inode, realfile->f_path.dentry, 0, 1);
+       if (err)
+               goto out_err;

[...]

 static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
-                          struct dentry *newdentry, bool hardlink)
+                          struct dentry *newdentry, bool hardlink,
bool tmpfile)
 {
        struct ovl_inode_params oip = {
                .upperdentry = newdentry,
@@ -295,6 +295,9 @@ static int ovl_instantiate(struct dentry *dentry,
struct inode *inode,
                inc_nlink(inode);
        }

+       if (tmpfile)
+               d_mark_tmpfile(dentry);
+
        d_instantiate(dentry, inode);


> +       file->private_data = realfile;
> +       return 0;
> +
> +out_err:
> +       dput(realfile->f_path.dentry);
> +       fput(realfile);
> +       return PTR_ERR(inode);
> +}
> +
> +static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
> +                             struct inode *inode, umode_t mode)
> +{
> +       int err;
> +       const struct cred *old_cred;
> +       struct cred *override_cred;
> +
> +       err = ovl_copy_up(dentry->d_parent);
> +       if (err)
> +               return err;
> +
> +       old_cred = ovl_override_creds(dentry->d_sb);
> +
> +       err = -ENOMEM;
> +       override_cred = prepare_creds();
> +       if (override_cred) {
> +               override_cred->fsuid = inode->i_uid;
> +               override_cred->fsgid = inode->i_gid;
> +               err = security_dentry_create_files_as(dentry, mode,
> +                                                     &dentry->d_name, old_cred,
> +                                                     override_cred);
> +               if (err) {
> +                       put_cred(override_cred);
> +                       goto out_revert_creds;
> +               }
> +               put_cred(override_creds(override_cred));
> +               put_cred(override_cred);
> +
> +               err = ovl_create_upper_tmpfile(file, dentry, inode, mode);
> +       }
> +out_revert_creds:
> +       revert_creds(old_cred);
> +       return err;
> +}

This also shouts unneeded and subtle code duplication to me:

 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
-                             struct ovl_cattr *attr, bool origin)
+                             struct ovl_cattr *attr, bool origin,
+                             struct file *tmpfile)
 {
        int err;
        const struct cred *old_cred;
@@ -602,7 +606,9 @@ static int ovl_create_or_link(struct dentry
*dentry, struct inode *inode,
                put_cred(override_cred);
        }

-       if (!ovl_dentry_is_whiteout(dentry))
+       if (tmpfile)
+               err = ovl_create_upper_tmpfile(tmpfile, dentry, inode,
attr->mode);
+       else if (!ovl_dentry_is_whiteout(dentry))
                err = ovl_create_upper(dentry, inode, attr);
        else
                err = ovl_create_over_whiteout(dentry, inode, attr);

> +
> +static int ovl_dummy_open(struct inode *inode, struct file *file)
> +{
> +       return 0;
> +}
> +
> +static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
> +                      struct file *file, umode_t mode)
> +{
> +       int err;
> +       struct dentry *dentry = file->f_path.dentry;
> +       struct inode *inode;
> +
> +       err = ovl_want_write(dentry);
> +       if (err)
> +               return err;
> +
> +       err = -ENOMEM;
> +       inode = ovl_new_inode(dentry->d_sb, mode, 0);
> +       if (!inode)
> +               goto drop_write;
> +
> +       inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
> +       err = ovl_create_tmpfile(file, dentry, inode, inode->i_mode);
> +       if (err)
> +               goto put_inode;
> +
> +       /*
> +        * Check if the preallocated inode was actually used.  Having something
> +        * else assigned to the dentry shouldn't happen as that would indicate
> +        * that the backing tmpfile "leaked" out of overlayfs.
> +        */
> +       err = -EIO;
> +       if (WARN_ON(inode != d_inode(dentry)))
> +               goto put_realfile;
> +
> +       /* inode reference was transferred to dentry */
> +       inode = NULL;
> +       err = finish_open(file, dentry, ovl_dummy_open);
> +put_realfile:
> +       if (!(file->f_mode & FMODE_OPENED))
> +               fput(file->private_data);

This cleanup bit is very subtle and hard for me to review.
I wonder if there is a way to improve this subtlety?

Would it be possible to write this cleanup as:
+       if (err && file->private_data)
+               fput(file->private_data);

With a comment explaining where file->private_data is set?

Overall, I did not find any bugs, but I am hoping that the code could be
a bit easier to review and maintain.

Thanks,
Amir.





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux