On Thu, Apr 25, 2024 at 1:16 PM Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote: > > Combine inode creation with opening a file. > > There are six separate objects that are being set up: the backing inode, > dentry and file and the overlay inode, dentry and file. Cleanup in case of > an error is a bit of a challenge and is difficult to test, so careful > review is needed. Did you try running the xfstests with the t_open_tmpfiles test program? (generic/530 generic/531) Note that those tests also run without O_TMPFILE support, so if you run them you should verify that they do not fall back to unlinked files. There are also a few tests that require O_TMPFILE support: _require_xfs_io_command "-T" (generic/004 generic/389 generic/509) There are some tests in src/vfs/vfstest that run sgid tests with O_TMPFILE if it is supported. I identified generic/696 and generic/697, but only the latter currently runs on overlayfs. > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> > --- > fs/backing-file.c | 23 +++++++ > fs/internal.h | 3 + > fs/namei.c | 6 +- > fs/overlayfs/dir.c | 130 +++++++++++++++++++++++++++++++++++ > fs/overlayfs/file.c | 3 - > fs/overlayfs/overlayfs.h | 3 + > include/linux/backing-file.h | 4 ++ > 7 files changed, 166 insertions(+), 6 deletions(-) > > diff --git a/fs/backing-file.c b/fs/backing-file.c > index 740185198db3..2dc3f7477d1d 100644 > --- a/fs/backing-file.c > +++ b/fs/backing-file.c > @@ -52,6 +52,29 @@ struct file *backing_file_open(const struct path *user_path, int flags, > } > EXPORT_SYMBOL_GPL(backing_file_open); > > +struct file *backing_tmpfile_open(const struct path *user_path, int flags, > + struct mnt_idmap *real_idmap, > + const struct path *real_parentpath, > + umode_t mode, const struct cred *cred) > +{ > + struct file *f; > + int error; > + > + f = alloc_empty_backing_file(flags, cred); > + if (IS_ERR(f)) > + return f; > + > + path_get(user_path); > + *backing_file_user_path(f) = *user_path; > + error = vfs_tmpfile(real_idmap, real_parentpath, f, mode); We do not have a custom idmap in other backing_file helpers don't see why we need real_idmap in this helper. I think that should be: mnt_idmap(real_parentpath.mnt) > + if (error) { > + fput(f); > + f = ERR_PTR(error); > + } > + return f; > +} > +EXPORT_SYMBOL(backing_tmpfile_open); > + > struct backing_aio { > struct kiocb iocb; > refcount_t ref; > diff --git a/fs/internal.h b/fs/internal.h > index 7ca738904e34..ab2225136f60 100644 > --- a/fs/internal.h > +++ b/fs/internal.h > @@ -62,6 +62,9 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode); > int do_symlinkat(struct filename *from, int newdfd, struct filename *to); > int do_linkat(int olddfd, struct filename *old, int newdfd, > struct filename *new, int flags); > +int vfs_tmpfile(struct mnt_idmap *idmap, > + const struct path *parentpath, > + struct file *file, umode_t mode); > > /* > * namespace.c > diff --git a/fs/namei.c b/fs/namei.c > index c5b2a25be7d0..13e50b0a49d2 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -3668,9 +3668,9 @@ static int do_open(struct nameidata *nd, > * On non-idmapped mounts or if permission checking is to be performed on the > * raw inode simply pass @nop_mnt_idmap. > */ > -static int vfs_tmpfile(struct mnt_idmap *idmap, > - const struct path *parentpath, > - struct file *file, umode_t mode) > +int vfs_tmpfile(struct mnt_idmap *idmap, > + const struct path *parentpath, > + struct file *file, umode_t mode) > { > struct dentry *child; > struct inode *dir = d_inode(parentpath->dentry); > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index 0f8b4a719237..91ac268986a9 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -14,6 +14,7 @@ > #include <linux/posix_acl_xattr.h> > #include <linux/atomic.h> > #include <linux/ratelimit.h> > +#include <linux/backing-file.h> > #include "overlayfs.h" > > static unsigned short ovl_redirect_max = 256; > @@ -1290,6 +1291,134 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, > return err; > } > > +static int ovl_create_upper_tmpfile(struct file *file, struct dentry *dentry, > + struct inode *inode, umode_t mode) > +{ > + struct ovl_inode_params oip; > + struct path realparentpath; > + struct file *realfile; > + /* It's okay to set O_NOATIME, since the owner will be current fsuid */ > + int flags = file->f_flags | OVL_OPEN_FLAGS; > + > + ovl_path_upper(dentry->d_parent, &realparentpath); > + > + if (!IS_POSIXACL(d_inode(realparentpath.dentry))) > + mode &= ~current_umask(); > + > + realfile = backing_tmpfile_open(&file->f_path, flags, > + &nop_mnt_idmap, &realparentpath, mode, > + current_cred()); Using &nop_mnt_idmap here is not only unneeded but also looks wrong. > + if (IS_ERR(realfile)) > + return PTR_ERR(realfile); > + > + ovl_dentry_set_upper_alias(dentry); > + ovl_dentry_update_reval(dentry, realfile->f_path.dentry); > + > + /* ovl_get_inode() consumes the .upperdentry reference on success */ > + oip = (struct ovl_inode_params) { > + .upperdentry = dget(realfile->f_path.dentry), > + .newinode = inode, > + }; > + > + inode = ovl_get_inode(dentry->d_sb, &oip); > + if (IS_ERR(inode)) > + goto out_err; > + > + /* d_tmpfile() expects inode to have a positive link count */ > + set_nlink(inode, 1); > + d_tmpfile(file, inode); Any reason not to reuse ovl_instantiate() to avoid duplicating some of the subtlety? for example: + /* ovl_instantiate() consumes the .upperdentry reference on success */ + dget(realfile->f_path.dentry) + err = ovl_instantiate(dentry, inode, realfile->f_path.dentry, 0, 1); + if (err) + goto out_err; [...] static int ovl_instantiate(struct dentry *dentry, struct inode *inode, - struct dentry *newdentry, bool hardlink) + struct dentry *newdentry, bool hardlink, bool tmpfile) { struct ovl_inode_params oip = { .upperdentry = newdentry, @@ -295,6 +295,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, inc_nlink(inode); } + if (tmpfile) + d_mark_tmpfile(dentry); + d_instantiate(dentry, inode); > + file->private_data = realfile; > + return 0; > + > +out_err: > + dput(realfile->f_path.dentry); > + fput(realfile); > + return PTR_ERR(inode); > +} > + > +static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, > + struct inode *inode, umode_t mode) > +{ > + int err; > + const struct cred *old_cred; > + struct cred *override_cred; > + > + err = ovl_copy_up(dentry->d_parent); > + if (err) > + return err; > + > + old_cred = ovl_override_creds(dentry->d_sb); > + > + err = -ENOMEM; > + override_cred = prepare_creds(); > + if (override_cred) { > + override_cred->fsuid = inode->i_uid; > + override_cred->fsgid = inode->i_gid; > + err = security_dentry_create_files_as(dentry, mode, > + &dentry->d_name, old_cred, > + override_cred); > + if (err) { > + put_cred(override_cred); > + goto out_revert_creds; > + } > + put_cred(override_creds(override_cred)); > + put_cred(override_cred); > + > + err = ovl_create_upper_tmpfile(file, dentry, inode, mode); > + } > +out_revert_creds: > + revert_creds(old_cred); > + return err; > +} This also shouts unneeded and subtle code duplication to me: static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct ovl_cattr *attr, bool origin) + struct ovl_cattr *attr, bool origin, + struct file *tmpfile) { int err; const struct cred *old_cred; @@ -602,7 +606,9 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, put_cred(override_cred); } - if (!ovl_dentry_is_whiteout(dentry)) + if (tmpfile) + err = ovl_create_upper_tmpfile(tmpfile, dentry, inode, attr->mode); + else if (!ovl_dentry_is_whiteout(dentry)) err = ovl_create_upper(dentry, inode, attr); else err = ovl_create_over_whiteout(dentry, inode, attr); > + > +static int ovl_dummy_open(struct inode *inode, struct file *file) > +{ > + return 0; > +} > + > +static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir, > + struct file *file, umode_t mode) > +{ > + int err; > + struct dentry *dentry = file->f_path.dentry; > + struct inode *inode; > + > + err = ovl_want_write(dentry); > + if (err) > + return err; > + > + err = -ENOMEM; > + inode = ovl_new_inode(dentry->d_sb, mode, 0); > + if (!inode) > + goto drop_write; > + > + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); > + err = ovl_create_tmpfile(file, dentry, inode, inode->i_mode); > + if (err) > + goto put_inode; > + > + /* > + * Check if the preallocated inode was actually used. Having something > + * else assigned to the dentry shouldn't happen as that would indicate > + * that the backing tmpfile "leaked" out of overlayfs. > + */ > + err = -EIO; > + if (WARN_ON(inode != d_inode(dentry))) > + goto put_realfile; > + > + /* inode reference was transferred to dentry */ > + inode = NULL; > + err = finish_open(file, dentry, ovl_dummy_open); > +put_realfile: > + if (!(file->f_mode & FMODE_OPENED)) > + fput(file->private_data); This cleanup bit is very subtle and hard for me to review. I wonder if there is a way to improve this subtlety? Would it be possible to write this cleanup as: + if (err && file->private_data) + fput(file->private_data); With a comment explaining where file->private_data is set? Overall, I did not find any bugs, but I am hoping that the code could be a bit easier to review and maintain. Thanks, Amir.