Thank you for addressing the symbolic link problems! Additionally, I found an incompatibility with the no_open feature. When the FUSE server has the no_open feature enabled, the atomic_open FUSE request returns a d_entry with an empty file handler and open option FOPEN_KEEP_CACHE (for files) or FOPEN_CACHE_DIR (for directories). With this situation, if we can set fc->no_open = 1 or fc->no_opendir = 1 after receiving the such FUSE reply, as follows, will make the atomic_open compatible with no_open feature: + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = -ENOMEM; + goto out_err; + } + + if(ff->fh == 0) { + if(ff->open_flags & FOPEN_KEEP_CACHE) + fc->no_open = 1; + if(ff->open_flags & FOPEN_CACHE_DIR) + fc->no_opendir = 1; + } + + /* prevent racing/parallel lookup on a negative hashed */ On Tue, Oct 24, 2023 at 3:41 AM Bernd Schubert <bschubert@xxxxxxx> wrote: > > From: Dharmendra Singh <dsingh@xxxxxxx> > > This adds full atomic open support, to avoid lookup before open/create. > If the implementation (fuse server/daemon) does not support atomic open > it falls back to non-atomic open. > > Co-developed-by: Bernd Schubert <bschubert@xxxxxxx> > Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> > Signed-off-by: Dharmendra Singh <dsingh@xxxxxxx> > Signed-off-by: Horst Birthelmer <hbirthelmer@xxxxxxx> > Cc: Miklos Szeredi <miklos@xxxxxxxxxx> > Cc: Christian Brauner <brauner@xxxxxxxxxx> > Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > Cc: Dharmendra Singh <dsingh@xxxxxxx> > Cc: linux-fsdevel@xxxxxxxxxxxxxxx > --- > fs/fuse/dir.c | 214 +++++++++++++++++++++++++++++++++++++- > fs/fuse/fuse_i.h | 3 + > include/uapi/linux/fuse.h | 3 + > 3 files changed, 219 insertions(+), 1 deletion(-) > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index e1095852601c..61cdb8e5f68e 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -716,7 +716,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, > > static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, > +static int fuse_create_open(struct inode *dir, struct dentry *entry, > struct file *file, unsigned flags, > umode_t mode) > { > @@ -763,6 +763,218 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, > return finish_no_open(file, res); > } > > +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, > + struct file *file, unsigned int flags, > + umode_t mode) > +{ > + int err; > + struct inode *inode; > + FUSE_ARGS(args); > + struct fuse_mount *fm = get_fuse_mount(dir); > + struct fuse_conn *fc = fm->fc; > + struct fuse_forget_link *forget; > + struct fuse_create_in inarg; > + struct fuse_open_out outopen; > + struct fuse_entry_out outentry; > + struct fuse_inode *fi; > + struct fuse_file *ff; > + struct dentry *switched_entry = NULL, *alias = NULL; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); > + > + /* Expect a negative dentry */ > + if (unlikely(d_inode(entry))) > + goto fallback; > + > + /* Userspace expects S_IFREG in create mode */ > + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) > + goto fallback; > + > + forget = fuse_alloc_forget(); > + err = -ENOMEM; > + if (!forget) > + goto out_err; > + > + err = -ENOMEM; > + ff = fuse_file_alloc(fm); > + if (!ff) > + goto out_put_forget_req; > + > + if (!fc->dont_mask) > + mode &= ~current_umask(); > + > + flags &= ~O_NOCTTY; > + memset(&inarg, 0, sizeof(inarg)); > + memset(&outentry, 0, sizeof(outentry)); > + inarg.flags = flags; > + inarg.mode = mode; > + inarg.umask = current_umask(); > + > + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && > + !(flags & O_EXCL) && !capable(CAP_FSETID)) { > + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; > + } > + > + args.opcode = FUSE_OPEN_ATOMIC; > + args.nodeid = get_node_id(dir); > + args.in_numargs = 2; > + args.in_args[0].size = sizeof(inarg); > + args.in_args[0].value = &inarg; > + args.in_args[1].size = entry->d_name.len + 1; > + args.in_args[1].value = entry->d_name.name; > + args.out_numargs = 2; > + args.out_args[0].size = sizeof(outentry); > + args.out_args[0].value = &outentry; > + args.out_args[1].size = sizeof(outopen); > + args.out_args[1].value = &outopen; > + > + if (flags & O_CREAT) { > + err = get_create_ext(&args, dir, entry, mode); > + if (err) > + goto out_free_ff; > + } > + > + err = fuse_simple_request(fm, &args); > + free_ext_value(&args); > + if (err == -ENOSYS || err == -ELOOP) { > + if (unlikely(err == -ENOSYS)) > + fc->no_open_atomic = 1; > + goto free_and_fallback; > + } > + > + if (!err && !outentry.nodeid) > + err = -ENOENT; > + > + if (err) > + goto out_free_ff; > + > + err = -EIO; > + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) > + goto out_free_ff; > + > + ff->fh = outopen.fh; > + ff->nodeid = outentry.nodeid; > + ff->open_flags = outopen.open_flags; > + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, > + &outentry.attr, ATTR_TIMEOUT(&outentry), 0); > + if (!inode) { > + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); > + fuse_sync_release(NULL, ff, flags); > + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); > + err = -ENOMEM; > + goto out_err; > + } > + > + /* prevent racing/parallel lookup on a negative hashed */ > + if (!(flags & O_CREAT) && !d_in_lookup(entry)) { > + d_drop(entry); > + switched_entry = d_alloc_parallel(entry->d_parent, > + &entry->d_name, &wq); > + if (IS_ERR(switched_entry)) { > + err = PTR_ERR(switched_entry); > + switched_entry = NULL; > + goto out_free_ff; > + } > + > + if (unlikely(!d_in_lookup(switched_entry))) { > + /* fall back */ > + dput(switched_entry); > + switched_entry = NULL; > + goto free_and_fallback; > + } > + > + entry = switched_entry; > + } > + > + if (d_really_is_negative(entry)) { > + d_drop(entry); > + alias = d_exact_alias(entry, inode); > + if (!alias) { > + alias = d_splice_alias(inode, entry); > + if (IS_ERR(alias)) { > + /* > + * Close the file in user space, but do not unlink it, > + * if it was created - with network file systems other > + * clients might have already accessed it. > + */ > + fi = get_fuse_inode(inode); > + fuse_sync_release(fi, ff, flags); > + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); > + err = PTR_ERR(alias); > + goto out_err; > + } > + } > + > + if (alias) > + entry = alias; > + } > + > + fuse_change_entry_timeout(entry, &outentry); > + > + /* File was indeed created */ > + if (outopen.open_flags & FOPEN_FILE_CREATED) { > + if (!(flags & O_CREAT)) { > + pr_debug("Server side bug, FOPEN_FILE_CREATED set " > + "without O_CREAT, ignoring."); > + } else { > + /* This should be always set when the file is created */ > + fuse_dir_changed(dir); > + file->f_mode |= FMODE_CREATED; > + } > + } > + > + if (S_ISDIR(mode)) > + ff->open_flags &= ~FOPEN_DIRECT_IO; > + err = finish_open(file, entry, generic_file_open); > + if (err) { > + fi = get_fuse_inode(inode); > + fuse_sync_release(fi, ff, flags); > + } else { > + file->private_data = ff; > + fuse_finish_open(inode, file); > + } > + > + kfree(forget); > + > + if (switched_entry) { > + d_lookup_done(switched_entry); > + dput(switched_entry); > + } > + > + dput(alias); > + > + return err; > + > +out_free_ff: > + fuse_file_free(ff); > +out_put_forget_req: > + kfree(forget); > +out_err: > + if (switched_entry) { > + d_lookup_done(switched_entry); > + dput(switched_entry); > + } > + > + return err; > + > +free_and_fallback: > + fuse_file_free(ff); > + kfree(forget); > +fallback: > + return fuse_create_open(dir, entry, file, flags, mode); > +} > + > +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, > + struct file *file, unsigned int flags, > + umode_t mode) > +{ > + struct fuse_conn *fc = get_fuse_conn(dir); > + > + if (fc->no_open_atomic) > + return fuse_create_open(dir, entry, file, flags, mode); > + else > + return _fuse_atomic_open(dir, entry, file, flags, mode); > +} > + > /* > * Code shared between mknod, mkdir, symlink and link > */ > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index bf0b85d0b95c..af69578763ef 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -677,6 +677,9 @@ struct fuse_conn { > /** Is open/release not implemented by fs? */ > unsigned no_open:1; > > + /** Is open atomic not implemented by fs? */ > + unsigned no_open_atomic:1; > + > /** Is opendir/releasedir not implemented by fs? */ > unsigned no_opendir:1; > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > index db92a7202b34..1508afbd9446 100644 > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -353,6 +353,7 @@ struct fuse_file_lock { > * FOPEN_STREAM: the file is stream-like (no file position at all) > * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) > * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode > + * FOPEN_FILE_CREATED: the file was indeed created > */ > #define FOPEN_DIRECT_IO (1 << 0) > #define FOPEN_KEEP_CACHE (1 << 1) > @@ -361,6 +362,7 @@ struct fuse_file_lock { > #define FOPEN_STREAM (1 << 4) > #define FOPEN_NOFLUSH (1 << 5) > #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) > +#define FOPEN_FILE_CREATED (1 << 7) > > /** > * INIT request/reply flags > @@ -617,6 +619,7 @@ enum fuse_opcode { > FUSE_SYNCFS = 50, > FUSE_TMPFILE = 51, > FUSE_STATX = 52, > + FUSE_OPEN_ATOMIC = 53, > > /* CUSE specific operations */ > CUSE_INIT = 4096, > -- > 2.39.2 > >