From: Dharmendra Singh <dsingh@xxxxxxx> This adds full atomic open support, to avoid lookup before open/create. If the implementation (fuse server/daemon) does not support atomic open it falls back to non-atomic open. Co-developed-by: Bernd Schubert <bschubert@xxxxxxx> Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> Signed-off-by: Dharmendra Singh <dsingh@xxxxxxx> Signed-off-by: Horst Birthelmer <hbirthelmer@xxxxxxx> Cc: Miklos Szeredi <miklos@xxxxxxxxxx> Cc: Christian Brauner <brauner@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Dharmendra Singh <dsingh@xxxxxxx> Cc: linux-fsdevel@xxxxxxxxxxxxxxx --- fs/fuse/dir.c | 214 +++++++++++++++++++++++++++++++++++++- fs/fuse/fuse_i.h | 3 + include/uapi/linux/fuse.h | 3 + 3 files changed, 219 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e1095852601c..61cdb8e5f68e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -716,7 +716,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, +static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) { @@ -763,6 +763,218 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, return finish_no_open(file, res); } +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, + umode_t mode) +{ + int err; + struct inode *inode; + FUSE_ARGS(args); + struct fuse_mount *fm = get_fuse_mount(dir); + struct fuse_conn *fc = fm->fc; + struct fuse_forget_link *forget; + struct fuse_create_in inarg; + struct fuse_open_out outopen; + struct fuse_entry_out outentry; + struct fuse_inode *fi; + struct fuse_file *ff; + struct dentry *switched_entry = NULL, *alias = NULL; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + + /* Expect a negative dentry */ + if (unlikely(d_inode(entry))) + goto fallback; + + /* Userspace expects S_IFREG in create mode */ + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) + goto fallback; + + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out_err; + + err = -ENOMEM; + ff = fuse_file_alloc(fm); + if (!ff) + goto out_put_forget_req; + + if (!fc->dont_mask) + mode &= ~current_umask(); + + flags &= ~O_NOCTTY; + memset(&inarg, 0, sizeof(inarg)); + memset(&outentry, 0, sizeof(outentry)); + inarg.flags = flags; + inarg.mode = mode; + inarg.umask = current_umask(); + + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && + !(flags & O_EXCL) && !capable(CAP_FSETID)) { + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; + } + + args.opcode = FUSE_OPEN_ATOMIC; + args.nodeid = get_node_id(dir); + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; + args.out_numargs = 2; + args.out_args[0].size = sizeof(outentry); + args.out_args[0].value = &outentry; + args.out_args[1].size = sizeof(outopen); + args.out_args[1].value = &outopen; + + if (flags & O_CREAT) { + err = get_create_ext(&args, dir, entry, mode); + if (err) + goto out_free_ff; + } + + err = fuse_simple_request(fm, &args); + free_ext_value(&args); + if (err == -ENOSYS || err == -ELOOP) { + if (unlikely(err == -ENOSYS)) + fc->no_open_atomic = 1; + goto free_and_fallback; + } + + if (!err && !outentry.nodeid) + err = -ENOENT; + + if (err) + goto out_free_ff; + + err = -EIO; + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) + goto out_free_ff; + + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, + &outentry.attr, ATTR_TIMEOUT(&outentry), 0); + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = -ENOMEM; + goto out_err; + } + + /* prevent racing/parallel lookup on a negative hashed */ + if (!(flags & O_CREAT) && !d_in_lookup(entry)) { + d_drop(entry); + switched_entry = d_alloc_parallel(entry->d_parent, + &entry->d_name, &wq); + if (IS_ERR(switched_entry)) { + err = PTR_ERR(switched_entry); + switched_entry = NULL; + goto out_free_ff; + } + + if (unlikely(!d_in_lookup(switched_entry))) { + /* fall back */ + dput(switched_entry); + switched_entry = NULL; + goto free_and_fallback; + } + + entry = switched_entry; + } + + if (d_really_is_negative(entry)) { + d_drop(entry); + alias = d_exact_alias(entry, inode); + if (!alias) { + alias = d_splice_alias(inode, entry); + if (IS_ERR(alias)) { + /* + * Close the file in user space, but do not unlink it, + * if it was created - with network file systems other + * clients might have already accessed it. + */ + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = PTR_ERR(alias); + goto out_err; + } + } + + if (alias) + entry = alias; + } + + fuse_change_entry_timeout(entry, &outentry); + + /* File was indeed created */ + if (outopen.open_flags & FOPEN_FILE_CREATED) { + if (!(flags & O_CREAT)) { + pr_debug("Server side bug, FOPEN_FILE_CREATED set " + "without O_CREAT, ignoring."); + } else { + /* This should be always set when the file is created */ + fuse_dir_changed(dir); + file->f_mode |= FMODE_CREATED; + } + } + + if (S_ISDIR(mode)) + ff->open_flags &= ~FOPEN_DIRECT_IO; + err = finish_open(file, entry, generic_file_open); + if (err) { + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + } else { + file->private_data = ff; + fuse_finish_open(inode, file); + } + + kfree(forget); + + if (switched_entry) { + d_lookup_done(switched_entry); + dput(switched_entry); + } + + dput(alias); + + return err; + +out_free_ff: + fuse_file_free(ff); +out_put_forget_req: + kfree(forget); +out_err: + if (switched_entry) { + d_lookup_done(switched_entry); + dput(switched_entry); + } + + return err; + +free_and_fallback: + fuse_file_free(ff); + kfree(forget); +fallback: + return fuse_create_open(dir, entry, file, flags, mode); +} + +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, + umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + + if (fc->no_open_atomic) + return fuse_create_open(dir, entry, file, flags, mode); + else + return _fuse_atomic_open(dir, entry, file, flags, mode); +} + /* * Code shared between mknod, mkdir, symlink and link */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index bf0b85d0b95c..af69578763ef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -677,6 +677,9 @@ struct fuse_conn { /** Is open/release not implemented by fs? */ unsigned no_open:1; + /** Is open atomic not implemented by fs? */ + unsigned no_open_atomic:1; + /** Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index db92a7202b34..1508afbd9446 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -353,6 +353,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_FILE_CREATED: the file was indeed created */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -361,6 +362,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_FILE_CREATED (1 << 7) /** * INIT request/reply flags @@ -617,6 +619,7 @@ enum fuse_opcode { FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, FUSE_STATX = 52, + FUSE_OPEN_ATOMIC = 53, /* CUSE specific operations */ CUSE_INIT = 4096, -- 2.39.2