From: Dharmendra Singh <dsingh@xxxxxxx> This adds full atomic open support, to avoid lookup before open/create. If the implementation (fuse server/daemon) does not support atomic open it falls back to non-atomic open. Co-developed-by: Bernd Schubert <bschubert@xxxxxxx> Signed-off-by: Dharmendra Singh <dsingh@xxxxxxx> Signed-off-by: Horst Birthelmer <hbirthelmer@xxxxxxx> Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> Cc: Miklos Szeredi <miklos@xxxxxxxxxx> Cc: Dharmendra Singh <dsingh@xxxxxxx> Cc: linux-fsdevel@xxxxxxxxxxxxxxx --- fs/fuse/dir.c | 222 +++++++++++++++++++++++++++++++++++++- fs/fuse/fuse_i.h | 3 + include/uapi/linux/fuse.h | 3 + 3 files changed, 227 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6ffc573de470..c02b63fe91ca 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -724,7 +724,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, +static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) { @@ -770,6 +770,226 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, return finish_no_open(file, res); } +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode) +{ + + int err; + struct inode *inode; + struct fuse_mount *fm = get_fuse_mount(dir); + struct fuse_conn *fc = fm->fc; + FUSE_ARGS(args); + struct fuse_forget_link *forget; + struct fuse_create_in inarg; + struct fuse_open_out outopen; + struct fuse_entry_out outentry; + struct fuse_inode *fi; + struct fuse_file *ff; + struct dentry *switched_entry = NULL, *alias = NULL; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + + /* Expect a negative dentry */ + if (unlikely(d_inode(entry))) + goto fallback; + + /* Userspace expects S_IFREG in create mode */ + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) + goto fallback; + + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out_err; + + err = -ENOMEM; + ff = fuse_file_alloc(fm); + if (!ff) + goto out_put_forget_req; + + if (!fc->dont_mask) + mode &= ~current_umask(); + + flags &= ~O_NOCTTY; + memset(&inarg, 0, sizeof(inarg)); + memset(&outentry, 0, sizeof(outentry)); + inarg.flags = flags; + inarg.mode = mode; + inarg.umask = current_umask(); + + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && + !(flags & O_EXCL) && !capable(CAP_FSETID)) { + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; + } + + args.opcode = FUSE_OPEN_ATOMIC; + args.nodeid = get_node_id(dir); + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; + args.out_numargs = 2; + args.out_args[0].size = sizeof(outentry); + args.out_args[0].value = &outentry; + args.out_args[1].size = sizeof(outopen); + args.out_args[1].value = &outopen; + + if (flags & O_CREAT) { + err = get_create_ext(&args, dir, entry, mode); + if (err) + goto out_free_ff; + } + + err = fuse_simple_request(fm, &args); + free_ext_value(&args); + if (err == -ENOSYS) { + fc->no_open_atomic = 1; + fuse_file_free(ff); + kfree(forget); + goto fallback; + } + + if (!err && !outentry.nodeid) + err = -ENOENT; + + if (err) + goto out_free_ff; + + err = -EIO; + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) + goto out_free_ff; + + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, + &outentry.attr, entry_attr_timeout(&outentry), 0); + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = -ENOMEM; + goto out_err; + } + + /* prevent racing/parallel lookup on a negative hashed */ + if (!(flags & O_CREAT) && !d_in_lookup(entry)) { + d_drop(entry); + switched_entry = d_alloc_parallel(entry->d_parent, + &entry->d_name, &wq); + if (IS_ERR(switched_entry)) { + err = PTR_ERR(switched_entry); + goto out_free_ff; + } + + if (unlikely(!d_in_lookup(switched_entry))) { + /* fall back */ + dput(switched_entry); + switched_entry = NULL; + goto free_and_fallback; + } + + entry = switched_entry; + } + + /* modified version of _nfs4_open_and_get_state - nfs does not open + * dirs, fuse doe + * nfs has additional d_really_is_negative condition, which does not + * make sense as long as only negative dentries come into this function, + * see BUG_ON above and missing revalidate patch - but needed if + * we are going to handle revalidate + */ + if (d_really_is_negative(entry)) { + d_drop(entry); + alias = d_exact_alias(entry, inode); + if (!alias) { + alias = d_splice_alias(inode, entry); + if (IS_ERR(alias)) { + /* + * Close the file in user space, but do not unlink it, + * if it was created - with network file systems other + * clients might have already accessed it. + */ + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = PTR_ERR(alias); + goto out_err; + } + } + + if (alias) + entry = alias; + } + + fuse_change_entry_timeout(entry, &outentry); + + /* File was indeed created */ + if (outopen.open_flags & FOPEN_FILE_CREATED) { + if (!(flags & O_CREAT)) { + pr_debug("Server side bug, FOPEN_FILE_CREATED set " + "without O_CREAT, ignoring."); + } else { + /* This should be always set when the file is created */ + fuse_dir_changed(dir); + file->f_mode |= FMODE_CREATED; + } + } + + if (S_ISDIR(mode)) + ff->open_flags &= ~FOPEN_DIRECT_IO; + err = finish_open(file, entry, generic_file_open); + if (err) { + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + } else { + file->private_data = ff; + fuse_finish_open(inode, file); + } + + kfree(forget); + + if (switched_entry) { + d_lookup_done(switched_entry); + dput(switched_entry); + } + + dput(alias); + + return err; + +out_free_ff: + fuse_file_free(ff); +out_put_forget_req: + kfree(forget); +out_err: + if (switched_entry) { + d_lookup_done(switched_entry); + dput(switched_entry); + } + + return err; + +free_and_fallback: + fuse_file_free(ff); + kfree(forget); +fallback: + return fuse_create_open(dir, entry, file, flags, mode); +} + +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + + if (fc->no_open_atomic) + return fuse_create_open(dir, entry, file, flags, mode); + else + return _fuse_atomic_open(dir, entry, file, flags, mode); +} + /* * Code shared between mknod, mkdir, symlink and link */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 9b7fc7d3c7f1..4e2ebcc28912 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -672,6 +672,9 @@ struct fuse_conn { /** Is open/release not implemented by fs? */ unsigned no_open:1; + /** Is open atomic not impelmented by fs? */ + unsigned no_open_atomic:1; + /** Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 1b9d0dfae72d..ee36263c0f3a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -314,6 +314,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_FILE_CREATED: the file was indeed created */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -322,6 +323,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_FILE_CREATED (1 << 7) /** * INIT request/reply flags @@ -572,6 +574,7 @@ enum fuse_opcode { FUSE_REMOVEMAPPING = 49, FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, + FUSE_OPEN_ATOMIC = 52, /* CUSE specific operations */ CUSE_INIT = 4096, -- 2.34.1