On Wed, Aug 31, 2022 at 02:30:40PM -0700, Yu-li Lin wrote: > Thanks for the reference. IIUC, the consensus is to make it atomic, > although there's no agreement on how it should be done. Does that mean > we should hold off on > this patch until atomic temp files are figured out higher in the stack > or do you have thoughts on how the fuse uapi should look like prior to > the vfs/refactoring decision? Here's a patch refactoring the tmpfile kapi to return an open file instead of a dentry. Comments? Thanks, Miklos --- fs/bad_inode.c | 2 fs/btrfs/inode.c | 12 +++-- fs/cachefiles/namei.c | 3 - fs/ext2/namei.c | 6 +- fs/ext4/namei.c | 15 ++++-- fs/f2fs/namei.c | 9 +++- fs/hugetlbfs/inode.c | 9 +++- fs/minix/namei.c | 9 ++-- fs/namei.c | 101 ++++++++++++++++++++++++++--------------------- fs/open.c | 7 +++ fs/overlayfs/overlayfs.h | 3 - fs/ramfs/inode.c | 6 +- fs/ubifs/dir.c | 5 +- fs/udf/namei.c | 6 +- fs/xfs/xfs_iops.c | 9 +++- include/linux/fs.h | 6 +- mm/shmem.c | 12 +++-- 17 files changed, 138 insertions(+), 82 deletions(-) --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -147,7 +147,7 @@ static int bad_inode_atomic_open(struct } static int bad_inode_tmpfile(struct user_namespace *mnt_userns, - struct inode *inode, struct dentry *dentry, + struct inode *inode, struct file *file, umode_t mode) { return -EIO; --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10169,7 +10169,7 @@ static int btrfs_permission(struct user_ } static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); struct btrfs_trans_handle *trans; @@ -10177,7 +10177,7 @@ static int btrfs_tmpfile(struct user_nam struct inode *inode; struct btrfs_new_inode_args new_inode_args = { .dir = dir, - .dentry = dentry, + .dentry = file->f_path.dentry, .orphan = true, }; unsigned int trans_num_items; @@ -10214,7 +10214,7 @@ static int btrfs_tmpfile(struct user_nam set_nlink(inode, 1); if (!ret) { - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); unlock_new_inode(inode); mark_inode_dirty(inode); } @@ -10224,9 +10224,11 @@ static int btrfs_tmpfile(struct user_nam out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: - if (ret) + if (ret) { iput(inode); - return ret; + return ret; + } + return finish_tmpfile(file); } void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end) --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -459,9 +459,10 @@ struct file *cachefiles_create_tmpfile(s cachefiles_begin_secure(cache, &saved_cred); path.mnt = cache->mnt; + path.dentry = fan; ret = cachefiles_inject_write_error(); if (ret == 0) - path.dentry = vfs_tmpfile(&init_user_ns, fan, S_IFREG, O_RDWR); + path.dentry = vfs_tmpfile(&init_user_ns, &path, S_IFREG, O_RDWR); else path.dentry = ERR_PTR(ret); if (IS_ERR(path.dentry)) { --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -120,7 +120,7 @@ static int ext2_create (struct user_name } static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct inode *inode = ext2_new_inode(dir, mode, NULL); if (IS_ERR(inode)) @@ -128,9 +128,9 @@ static int ext2_tmpfile(struct user_name ext2_set_file_ops(inode); mark_inode_dirty(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); unlock_new_inode(inode); - return 0; + return finish_tmpfile(file); } static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir, --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2849,7 +2849,7 @@ static int ext4_mknod(struct user_namesp } static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { handle_t *handle; struct inode *inode; @@ -2857,7 +2857,7 @@ static int ext4_tmpfile(struct user_name err = dquot_initialize(dir); if (err) - return err; + goto out; retry: inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, @@ -2871,7 +2871,7 @@ static int ext4_tmpfile(struct user_name inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); err = ext4_orphan_add(handle, inode); if (err) goto err_unlock_inode; @@ -2882,11 +2882,16 @@ static int ext4_tmpfile(struct user_name ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; - return err; +out: + if (err) + return err; + + return finish_tmpfile(file); + err_unlock_inode: ext4_journal_stop(handle); unlock_new_inode(inode); - return err; + goto out; } struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -915,16 +915,21 @@ static int __f2fs_tmpfile(struct user_na } static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, false, NULL); + err = __f2fs_tmpfile(mnt_userns, dir, file->f_path.dentry, mode, false, NULL); + if (err) + return err; + + return finish_tmpfile(file); } static int f2fs_create_whiteout(struct user_namespace *mnt_userns, --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -932,10 +932,15 @@ static int hugetlbfs_create(struct user_ } static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns, - struct inode *dir, struct dentry *dentry, + struct inode *dir, struct file *file, umode_t mode) { - return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true); + int err = do_hugetlbfs_mknod(dir, file->f_path.dentry, mode | S_IFREG, 0, true); + + if (err) + return err; + + return finish_tmpfile(file); } static int hugetlbfs_symlink(struct user_namespace *mnt_userns, --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -53,16 +53,19 @@ static int minix_mknod(struct user_names } static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { int error; struct inode *inode = minix_new_inode(dir, mode, &error); if (inode) { minix_set_inode(inode, 0); mark_inode_dirty(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); } - return error; + if (error) + return error; + + return finish_tmpfile(file); } static int minix_create(struct user_namespace *mnt_userns, struct inode *dir, --- a/fs/namei.c +++ b/fs/namei.c @@ -3568,59 +3568,78 @@ static int do_open(struct nameidata *nd, return error; } -/** - * vfs_tmpfile - create tmpfile - * @mnt_userns: user namespace of the mount the inode was found from - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new tmpfile - * @open_flag: flags - * - * Create a temporary file. - * - * If the inode has been found through an idmapped mount the user namespace of - * the vfsmount must be passed through @mnt_userns. This function will then take - * care to map the inode according to @mnt_userns before checking permissions. - * On non-idmapped mounts or if permission checking is to be performed on the - * raw inode simply passs init_user_ns. - */ -struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, - struct dentry *dentry, umode_t mode, int open_flag) +static int vfs_tmpfile_new(struct user_namespace *mnt_userns, + const struct path *parent_path, + struct file *file, umode_t mode) { - struct dentry *child = NULL; - struct inode *dir = dentry->d_inode; - struct inode *inode; + struct inode *inode, *dir = d_inode(parent_path->dentry); + struct dentry *child; int error; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); if (error) - goto out_err; + goto out; error = -EOPNOTSUPP; if (!dir->i_op->tmpfile) - goto out_err; + goto out; error = -ENOMEM; - child = d_alloc(dentry, &slash_name); + child = d_alloc(parent_path->dentry, &slash_name); if (unlikely(!child)) - goto out_err; + goto out; + file->f_path.mnt = parent_path->mnt; + file->f_path.dentry = child; mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode); - error = dir->i_op->tmpfile(mnt_userns, dir, child, mode); + error = dir->i_op->tmpfile(mnt_userns, dir, file, mode); if (error) - goto out_err; + goto out_dput; error = -ENOENT; inode = child->d_inode; if (unlikely(!inode)) - goto out_err; - if (!(open_flag & O_EXCL)) { + goto out_dput; + if (!(file->f_flags & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } ima_post_create_tmpfile(mnt_userns, inode); - return child; - -out_err: + error = 0; +out_dput: dput(child); - return ERR_PTR(error); +out: + return error; +} + +/** + * vfs_tmpfile - create tmpfile + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: pointer to dentry of the base directory + * @mode: mode of the new tmpfile + * @open_flag: flags + * + * Create a temporary file. + * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then take + * care to map the inode according to @mnt_userns before checking permissions. + * On non-idmapped mounts or if permission checking is to be performed on the + * raw inode simply passs init_user_ns. + */ +struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, + const struct path *path, umode_t mode, int open_flag) +{ + struct dentry *child; + struct file *file; + int error; + + file = alloc_empty_file(open_flag, current_cred()); + child = ERR_CAST(file); + if (!IS_ERR(file)) { + error = vfs_tmpfile_new(mnt_userns, path, file, mode); + child = error ? ERR_PTR(error) : dget(file->f_path.dentry); + fput(file); + } + return child; } EXPORT_SYMBOL(vfs_tmpfile); @@ -3629,26 +3648,22 @@ static int do_tmpfile(struct nameidata * struct file *file) { struct user_namespace *mnt_userns; - struct dentry *child; struct path path; - int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); + int error; + + error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) return error; error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; mnt_userns = mnt_user_ns(path.mnt); - child = vfs_tmpfile(mnt_userns, path.dentry, op->mode, op->open_flag); - error = PTR_ERR(child); - if (IS_ERR(child)) + error = vfs_tmpfile_new(mnt_userns, &path, file, op->mode); + if (error) goto out2; - dput(path.dentry); - path.dentry = child; - audit_inode(nd->name, child, 0); + audit_inode(nd->name, file->f_path.dentry, 0); /* Don't check for other permissions, the inode was just created */ - error = may_open(mnt_userns, &path, 0, op->open_flag); - if (!error) - error = vfs_open(&path, file); + error = may_open(mnt_userns, &file->f_path, 0, op->open_flag); out2: mnt_drop_write(path.mnt); out: --- a/fs/open.c +++ b/fs/open.c @@ -975,6 +975,13 @@ int finish_open(struct file *file, struc } EXPORT_SYMBOL(finish_open); +int finish_tmpfile(struct file *file) +{ + BUG_ON(file->f_mode & FMODE_OPENED); + return do_dentry_open(file, d_inode(file->f_path.dentry), NULL); +} + + /** * finish_no_open - finish ->atomic_open() without opening the file * --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -313,7 +313,8 @@ static inline int ovl_do_whiteout(struct static inline struct dentry *ovl_do_tmpfile(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { - struct dentry *ret = vfs_tmpfile(ovl_upper_mnt_userns(ofs), dentry, mode, 0); + struct path path = { .dentry = dentry, .mnt = ovl_upper_mnt(ofs) }; + struct dentry *ret = vfs_tmpfile(ovl_upper_mnt_userns(ofs), &path, mode, 0); int err = PTR_ERR_OR_ZERO(ret); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -146,15 +146,15 @@ static int ramfs_symlink(struct user_nam } static int ramfs_tmpfile(struct user_namespace *mnt_userns, - struct inode *dir, struct dentry *dentry, umode_t mode) + struct inode *dir, struct file *file, umode_t mode) { struct inode *inode; inode = ramfs_get_inode(dir->i_sb, dir, mode, 0); if (!inode) return -ENOSPC; - d_tmpfile(dentry, inode); - return 0; + d_tmpfile(file->f_path.dentry, inode); + return finish_tmpfile(file); } static const struct inode_operations ramfs_dir_inode_operations = { --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -424,8 +424,9 @@ static void unlock_2_inodes(struct inode } static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { + struct dentry *dentry = file->f_path.dentry; struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, @@ -489,7 +490,7 @@ static int ubifs_tmpfile(struct user_nam ubifs_release_budget(c, &req); - return 0; + return finish_tmpfile(file); out_cancel: unlock_2_inodes(dir, inode); --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -626,7 +626,7 @@ static int udf_create(struct user_namesp } static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct inode *inode = udf_new_inode(dir, mode); @@ -640,9 +640,9 @@ static int udf_tmpfile(struct user_names inode->i_op = &udf_file_inode_operations; inode->i_fop = &udf_file_operations; mark_inode_dirty(inode); - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); unlock_new_inode(inode); - return 0; + return finish_tmpfile(file); } static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir, --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1080,10 +1080,15 @@ STATIC int xfs_vn_tmpfile( struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, + struct file *file, umode_t mode) { - return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, true); + int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, true); + + if (err) + return err; + + return finish_tmpfile(file); } static const struct inode_operations xfs_inode_operations = { --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2005,7 +2005,8 @@ static inline int vfs_whiteout(struct us } struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, - struct dentry *dentry, umode_t mode, int open_flag); + const struct path *path, + umode_t mode, int open_flag); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), @@ -2167,7 +2168,7 @@ struct inode_operations { struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct user_namespace *, struct inode *, - struct dentry *, umode_t); + struct file *, umode_t); int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, @@ -2777,6 +2778,7 @@ extern void putname(struct filename *nam extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); +extern int finish_tmpfile(struct file *file); extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/dcache.c */ --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2912,7 +2912,7 @@ shmem_mknod(struct user_namespace *mnt_u static int shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct file *file, umode_t mode) { struct inode *inode; int error = -ENOSPC; @@ -2927,12 +2927,16 @@ shmem_tmpfile(struct user_namespace *mnt error = simple_acl_create(dir, inode); if (error) goto out_iput; - d_tmpfile(dentry, inode); + d_tmpfile(file->f_path.dentry, inode); } - return error; +out: + if (error) + return error; + + return finish_tmpfile(file); out_iput: iput(inode); - return error; + goto out; } static int shmem_mkdir(struct user_namespace *mnt_userns, struct inode *dir,