From: Amir Goldstein <amir73il@xxxxxxxxx> The fuse inode io mode is determined by the mode of its open files/mmaps and parallel dio. - caching io mode - files open in caching mode or mmap on direct_io file - direct io mode - no files open in caching mode and no files mmaped - parallel dio mode - direct io mode with parallel dio in progress We use a new FOPEN_CACHE_IO flag to explicitly mark a file that was open in caching mode. direct_io mmap uses page cache, so first mmap will mark the file as FOPEN_DIRECT_IO|FOPEN_CACHE_IO (i.e. mixed mode) and inode will enter the caching io mode. If the server opens the file with flags FOPEN_DIRECT_IO|FOPEN_CACHE_IO, the inode enters caching io mode already on open. This allows executing parallel dio when inode is not in caching mode even if shared mmap is allowed, but no mmaps have been performed on the inode in question. An open in caching mode and mmap on direct_io file now waits for all in-progress parallel dio writes to complete, so paralle dio writes together with FUSE_DIRECT_IO_ALLOW_MMAP is enabled by this commit. Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- fs/fuse/file.c | 215 ++++++++++++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 79 +++++++++++++- include/uapi/linux/fuse.h | 2 + 3 files changed, 286 insertions(+), 10 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7d2f4b0eb36a..eb9929ff9f60 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -105,10 +105,177 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, kfree(ra); } +static bool fuse_file_is_direct_io(struct file *file) +{ + struct fuse_file *ff = file->private_data; + + return ff->open_flags & FOPEN_DIRECT_IO || file->f_flags & O_DIRECT; +} + +/* + * Wait for cached io to be allowed - + * Blocks new parallel dio writes and waits for the in-progress parallel dio + * writes to complete. + */ +static int fuse_inode_wait_for_cached_io(struct fuse_inode *fi) +{ + int err = 0; + + assert_spin_locked(&fi->lock); + + while (!err && !fuse_inode_get_io_cache(fi)) { + /* + * Setting the bit advises new direct-io writes + * to use an exclusive lock - without it the wait below + * might be forever. + */ + set_bit(FUSE_I_CACHE_IO_MODE, &fi->state); + spin_unlock(&fi->lock); + err = wait_event_killable(fi->direct_io_waitq, + fuse_is_io_cache_allowed(fi)); + spin_lock(&fi->lock); + } + /* Clear FUSE_I_CACHE_IO_MODE flag if failed to enter caching mode */ + if (err && fi->iocachectr <= 0) + clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state); + + return err; +} + +/* Start cached io mode where parallel dio writes are not allowed */ +static int fuse_file_cached_io_start(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + int err; + + spin_lock(&fi->lock); + err = fuse_inode_wait_for_cached_io(fi); + spin_unlock(&fi->lock); + return err; +} + +static void fuse_file_cached_io_end(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fi->lock); + fuse_inode_put_io_cache(get_fuse_inode(inode)); + spin_unlock(&fi->lock); +} + +/* Start strictly uncached io mode where cache access is not allowed */ +static int fuse_file_uncached_io_start(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + bool ok; + + spin_lock(&fi->lock); + ok = fuse_inode_deny_io_cache(fi); + spin_unlock(&fi->lock); + return ok ? 0 : -ETXTBSY; +} + +static void fuse_file_uncached_io_end(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + bool allow_cached_io; + + spin_lock(&fi->lock); + allow_cached_io = fuse_inode_allow_io_cache(fi); + spin_unlock(&fi->lock); + if (allow_cached_io) + wake_up(&fi->direct_io_waitq); +} + +/* Open flags to determine regular file io mode */ +#define FOPEN_IO_MODE_MASK \ + (FOPEN_DIRECT_IO | FOPEN_CACHE_IO) + +/* Request access to submit new io to inode via open file */ +static int fuse_file_io_open(struct file *file, struct inode *inode) +{ + struct fuse_file *ff = file->private_data; + int iomode_flags = ff->open_flags & FOPEN_IO_MODE_MASK; + int err; + + err = -EBUSY; + if (WARN_ON(ff->io_opened)) + goto fail; + + if (!S_ISREG(inode->i_mode) || FUSE_IS_DAX(inode)) { + err = -EINVAL; + if (iomode_flags) + goto fail; + return 0; + } + + /* Set explicit FOPEN_CACHE_IO flag for file open in caching mode */ + if (!fuse_file_is_direct_io(file)) + ff->open_flags |= FOPEN_CACHE_IO; + + /* First caching file open enters caching inode io mode */ + if (ff->open_flags & FOPEN_CACHE_IO) { + err = fuse_file_cached_io_start(inode); + if (err) + goto fail; + } + + ff->io_opened = true; + return 0; + +fail: + pr_debug("failed to open file in requested io mode (open_flags=0x%x, err=%i).\n", + ff->open_flags, err); + /* + * The file open mode determines the inode io mode. + * Using incorrect open mode is a server mistake, which results in + * user visible failure of open() with EIO error. + */ + return -EIO; +} + +/* Request access to submit new io to inode via mmap */ +static int fuse_file_io_mmap(struct fuse_file *ff, struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + int err = 0; + + if (WARN_ON(!ff->io_opened)) + return -ENODEV; + + spin_lock(&fi->lock); + /* First mmap of direct_io file enters caching inode io mode */ + if (!(ff->open_flags & FOPEN_CACHE_IO)) { + err = fuse_inode_wait_for_cached_io(fi); + if (!err) + ff->open_flags |= FOPEN_CACHE_IO; + } + spin_unlock(&fi->lock); + + return err; +} + +/* No more pending io and no new io possible to inode via open/mmapped file */ +static void fuse_file_io_release(struct fuse_file *ff, struct inode *inode) +{ + if (!ff->io_opened) + return; + + /* Last caching file close exits caching inode io mode */ + if (ff->open_flags & FOPEN_CACHE_IO) + fuse_file_cached_io_end(inode); + + ff->io_opened = false; +} + static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) { if (refcount_dec_and_test(&ff->count)) { struct fuse_args *args = &ff->release_args->args; + struct inode *inode = ff->release_args->inode; + + if (inode) + fuse_file_io_release(ff, inode); if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { /* Do nothing when client does not implement 'open' */ @@ -161,7 +328,7 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, } if (isdir) - ff->open_flags &= ~FOPEN_DIRECT_IO; + ff->open_flags &= ~(FOPEN_DIRECT_IO | FOPEN_CACHE_IO); ff->nodeid = nodeid; @@ -199,6 +366,11 @@ int fuse_finish_open(struct inode *inode, struct file *file) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = get_fuse_conn(inode); + int err; + + err = fuse_file_io_open(file, inode); + if (err) + return err; if (ff->open_flags & FOPEN_STREAM) stream_open(inode, file); @@ -1320,6 +1492,7 @@ static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(iocb->ki_filp); + struct fuse_inode *fi = get_fuse_inode(inode); /* server side has to advise that it supports parallel dio writes */ if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES)) @@ -1331,11 +1504,9 @@ static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from if (iocb->ki_flags & IOCB_APPEND) return true; - /* combination opf page access and direct-io difficult, shared - * locks actually introduce a conflict. - */ - if (get_fuse_conn(inode)->direct_io_allow_mmap) - return true; + /* shared locks are not allowed with parallel page cache IO */ + if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state)) + return false; /* parallel dio beyond eof is at least for now not supported */ if (fuse_io_past_eof(iocb, from)) @@ -1355,10 +1526,14 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, } else { inode_lock_shared(inode); /* - * Previous check was without inode lock and might have raced, - * check again. + * New parallal dio allowed only if inode is not in caching + * mode and denies new opens in caching mode. This check + * should be performed only after taking shared inode lock. + * Previous past eof check was without inode lock and might + * have raced, so check it again. */ - if (fuse_io_past_eof(iocb, from)) { + if (fuse_io_past_eof(iocb, from) || + fuse_file_uncached_io_start(inode) != 0) { inode_unlock_shared(inode); inode_lock(inode); *exclusive = true; @@ -1371,6 +1546,8 @@ static void fuse_dio_unlock(struct inode *inode, bool exclusive) if (exclusive) { inode_unlock(inode); } else { + /* Allow opens in caching mode after last parallel dio end */ + fuse_file_uncached_io_end(inode); inode_unlock_shared(inode); } } @@ -2500,11 +2677,16 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fm->fc; + int rc; /* DAX mmap is superior to direct_io mmap */ if (FUSE_IS_DAX(file_inode(file))) return fuse_dax_mmap(file, vma); + /* + * FOPEN_DIRECT_IO handling is special compared to O_DIRECT, + * as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP. + */ if (ff->open_flags & FOPEN_DIRECT_IO) { /* * Can't provide the coherency needed for MAP_SHARED @@ -2515,10 +2697,23 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) invalidate_inode_pages2(file->f_mapping); + /* + * First mmap of direct_io file enters caching inode io mode. + * Also waits for parallel dio writers to go into serial mode + * (exclusive instead of shared lock). + */ + rc = fuse_file_io_mmap(ff, file_inode(file)); + if (rc) + return rc; + if (!(vma->vm_flags & VM_MAYSHARE)) { /* MAP_PRIVATE */ return generic_file_mmap(file, vma); } + } else if (file->f_flags & O_DIRECT) { + rc = fuse_file_io_mmap(ff, file_inode(file)); + if (rc) + return rc; } if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) @@ -3287,7 +3482,9 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags) INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); fi->writectr = 0; + fi->iocachectr = 0; init_waitqueue_head(&fi->page_waitq); + init_waitqueue_head(&fi->direct_io_waitq); fi->writepages = RB_ROOT; if (IS_ENABLED(CONFIG_FUSE_DAX)) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1c0cde4022f0..cb961f9a13c3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -111,7 +111,7 @@ struct fuse_inode { u64 attr_version; union { - /* Write related fields (regular file only) */ + /* read/write io cache (regular file only) */ struct { /* Files usable in writepage. Protected by fi->lock */ struct list_head write_files; @@ -123,9 +123,15 @@ struct fuse_inode { * (FUSE_NOWRITE) means more writes are blocked */ int writectr; + /** Number of files/maps using page cache */ + int iocachectr; + /* Waitq for writepage completion */ wait_queue_head_t page_waitq; + /* waitq for direct-io completion */ + wait_queue_head_t direct_io_waitq; + /* List of writepage requestst (pending or sent) */ struct rb_root writepages; }; @@ -187,6 +193,8 @@ enum { FUSE_I_BAD, /* Has btime */ FUSE_I_BTIME, + /* Wants or already has page cache IO */ + FUSE_I_CACHE_IO_MODE, }; struct fuse_conn; @@ -246,6 +254,9 @@ struct fuse_file { /** Has flock been performed on this file? */ bool flock:1; + + /** Was file opened for io? */ + bool io_opened:1; }; /** One input argument of a request */ @@ -1349,6 +1360,72 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* file.c */ +/* + * Request an open in caching mode. + * Return true if in caching mode. + */ +static inline bool fuse_inode_get_io_cache(struct fuse_inode *fi) +{ + assert_spin_locked(&fi->lock); + if (fi->iocachectr < 0) + return false; + fi->iocachectr++; + if (fi->iocachectr == 1) + set_bit(FUSE_I_CACHE_IO_MODE, &fi->state); + + return true; +} + +/* + * Release an open in caching mode. + * Return true if no more files open in caching mode. + */ +static inline bool fuse_inode_put_io_cache(struct fuse_inode *fi) +{ + assert_spin_locked(&fi->lock); + if (WARN_ON(fi->iocachectr <= 0)) + return false; + + if (--fi->iocachectr == 0) { + clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state); + return true; + } + + return false; +} + +/* + * Requets to deny new opens in caching mode. + * Return true if denying new opens in caching mode. + */ +static inline bool fuse_inode_deny_io_cache(struct fuse_inode *fi) +{ + assert_spin_locked(&fi->lock); + if (fi->iocachectr > 0) + return false; + fi->iocachectr--; + return true; +} + +/* + * Release a request to deny open in caching mode. + * Return true if allowing new opens in caching mode. + */ +static inline bool fuse_inode_allow_io_cache(struct fuse_inode *fi) +{ + assert_spin_locked(&fi->lock); + if (WARN_ON(fi->iocachectr >= 0)) + return false; + return ++(fi->iocachectr) == 0; +} + +/* + * Return true if allowing new opens in caching mode. + */ +static inline bool fuse_is_io_cache_allowed(struct fuse_inode *fi) +{ + return READ_ONCE(fi->iocachectr) >= 0; +} struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, bool isdir); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index e7418d15fe39..66a4bd8d767d 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -353,6 +353,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_CACHE_IO: using cache for this open file (incl. mmap on direct_io) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -361,6 +362,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_CACHE_IO (1 << 7) /** * INIT request/reply flags -- 2.40.1