On Mon, Apr 25, 2022 at 09:52:44PM +0800, Jiachen Zhang wrote: > Some users may want both the high performance of writeback mode and a > little bit more consistency among FUSE mounts. In the current > writeback mode implementation, users of one FUSE mount can never see > the file expansion done by other FUSE mounts. Okay. Here's a preliminary patch that you could try. Thanks, Miklos --- fs/fuse/dir.c | 35 ++++++++++++++++++++++------------- fs/fuse/file.c | 17 +++++++++++++++-- fs/fuse/fuse_i.h | 14 +++++++++++++- fs/fuse/inode.c | 32 +++++++++++++++++++++++++++----- include/uapi/linux/fuse.h | 5 +++++ 5 files changed, 82 insertions(+), 21 deletions(-) --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -194,6 +194,7 @@ * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + * - add FUSE_WRITEBACK_CACHE_V2 init flag */ #ifndef _LINUX_FUSE_H @@ -353,6 +354,9 @@ struct fuse_file_lock { * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and * mknod * FUSE_HAS_INODE_DAX: use per inode DAX + * FUSE_WRITEBACK_CACHE_V2: + * - allow time/size to be refreshed if no pending write + * - time/size not cached for falocate/copy_file_range */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -389,6 +393,7 @@ struct fuse_file_lock { /* bits 32..63 get shifted down 32 bits into the flags2 field */ #define FUSE_SECURITY_CTX (1ULL << 32) #define FUSE_HAS_INODE_DAX (1ULL << 33) +#define FUSE_WRITEBACK_CACHE_V2 (1ULL << 34) /** * CUSE INIT request/reply flags --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -222,19 +222,37 @@ void fuse_change_attributes_common(struc u32 fuse_get_cache_mask(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) return 0; + /* + * In writeback_cache_v2 mode if all the following conditions are met, + * then allow the attributes to be refreshed: + * + * - inode is not dirty (I_DIRTY_INODE) + * - inode is not in the process of being written (I_SYNC) + * - inode has no dirty pages (I_DIRTY_PAGES) + * - inode does not have any page writeback in progress + * + * Note: checking PAGECACHE_TAG_WRITEBACK is not sufficient in fuse, + * since inode can appear to have no PageWriteback pages, yet still have + * outstanding write request. + */ + if (fc->writeback_cache_v2 && !(inode->i_state & (I_DIRTY | I_SYNC)) && + RB_EMPTY_ROOT(&fi->writepages)) + return 0; + return STATX_MTIME | STATX_CTIME | STATX_SIZE; } -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) +void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version, + u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; @@ -244,7 +262,7 @@ void fuse_change_attributes(struct inode * may update i_size. In these cases trust the cached value in the * inode. */ - cache_mask = fuse_get_cache_mask(inode); + cache_mask |= fuse_get_cache_mask(inode); if (cache_mask & STATX_SIZE) attr->size = i_size_read(inode); @@ -1153,6 +1171,10 @@ static void process_init_reply(struct fu fc->async_dio = 1; if (flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (flags & FUSE_WRITEBACK_CACHE_V2) { + fc->writeback_cache = 1; + fc->writeback_cache_v2 = 1; + } if (flags & FUSE_PARALLEL_DIROPS) fc->parallel_dirops = 1; if (flags & FUSE_HANDLE_KILLPRIV) @@ -1234,7 +1256,7 @@ void fuse_send_init(struct fuse_mount *f FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | - FUSE_SECURITY_CTX; + FUSE_SECURITY_CTX | FUSE_WRITEBACK_CACHE_V2; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -213,6 +213,7 @@ static int fuse_dentry_revalidate(struct FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version; + u32 cache_mask; /* For negative dentries, always do a fresh lookup */ if (!inode) @@ -230,6 +231,7 @@ static int fuse_dentry_revalidate(struct goto out; attr_version = fuse_get_attr_version(fm->fc); + cache_mask = fuse_get_cache_mask(inode); parent = dget_parent(entry); fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), @@ -259,9 +261,9 @@ static int fuse_dentry_revalidate(struct goto invalid; forget_all_cached_acls(inode); - fuse_change_attributes(inode, &outarg.attr, - entry_attr_timeout(&outarg), - attr_version); + fuse_change_attributes_mask(inode, &outarg.attr, + entry_attr_timeout(&outarg), + attr_version, cache_mask); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { fi = get_fuse_inode(inode); @@ -836,16 +838,23 @@ static int fuse_symlink(struct user_name void fuse_flush_time_update(struct inode *inode) { - int err = sync_inode_metadata(inode, 1); + struct fuse_conn *fc = get_fuse_conn(inode); + int err; - mapping_set_error(inode->i_mapping, err); + if (!fc->writeback_cache_v2) { + err = sync_inode_metadata(inode, 1); + mapping_set_error(inode->i_mapping, err); + } } static void fuse_update_ctime_in_cache(struct inode *inode) { if (!IS_NOCMTIME(inode)) { + struct fuse_conn *fc = get_fuse_conn(inode); + inode->i_ctime = current_time(inode); - mark_inode_dirty_sync(inode); + if (!fc->writeback_cache_v2) + mark_inode_dirty_sync(inode); fuse_flush_time_update(inode); } } @@ -1065,7 +1074,7 @@ static void fuse_fillattr(struct inode * } static int fuse_do_getattr(struct inode *inode, struct kstat *stat, - struct file *file) + struct file *file, u32 cache_mask) { int err; struct fuse_getattr_in inarg; @@ -1100,9 +1109,9 @@ static int fuse_do_getattr(struct inode fuse_make_bad(inode); err = -EIO; } else { - fuse_change_attributes(inode, &outarg.attr, - attr_timeout(&outarg), - attr_version); + fuse_change_attributes_mask(inode, &outarg.attr, + attr_timeout(&outarg), + attr_version, cache_mask); if (stat) fuse_fillattr(inode, &outarg.attr, stat); } @@ -1131,7 +1140,7 @@ static int fuse_update_get_attr(struct i if (sync) { forget_all_cached_acls(inode); - err = fuse_do_getattr(inode, stat, file); + err = fuse_do_getattr(inode, stat, file, cache_mask); } else if (stat) { generic_fillattr(&init_user_ns, inode, stat); stat->mode = fi->orig_i_mode; @@ -1277,7 +1286,7 @@ static int fuse_perm_getattr(struct inod return -ECHILD; forget_all_cached_acls(inode); - return fuse_do_getattr(inode, NULL, NULL); + return fuse_do_getattr(inode, NULL, NULL, 0); } /* @@ -1833,7 +1842,7 @@ static int fuse_setattr(struct user_name * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. */ - ret = fuse_do_getattr(inode, NULL, file); + ret = fuse_do_getattr(inode, NULL, file, 0); if (ret) return ret; --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2949,6 +2949,19 @@ static int fuse_writeback_range(struct i return err; } +static void fuse_update_time(struct file *file) +{ + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!IS_NOCMTIME(inode)) { + if (fc->writeback_cache_v2) + inode->i_mtime = inode->i_ctime = current_time(inode); + else + file_update_time(file); + } +} + static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { @@ -3021,7 +3034,7 @@ static long fuse_file_fallocate(struct f /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { if (fuse_write_update_attr(inode, offset + length, length)) - file_update_time(file); + fuse_update_time(file); } if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -3135,7 +3148,7 @@ static ssize_t __fuse_copy_file_range(st ALIGN_DOWN(pos_out, PAGE_SIZE), ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); - file_update_time(file_out); + fuse_update_time(file_out); fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); err = outarg.size; --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -654,6 +654,9 @@ struct fuse_conn { /* show legacy mount options */ unsigned int legacy_opts_show:1; + /* Improved writeback cache policy */ + unsigned writeback_cache_v2:1; + /* * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on * write/trunc only if caller did not have CAP_FSETID. sgid is killed @@ -1049,8 +1052,17 @@ void fuse_init_symlink(struct inode *ino /** * Change attributes of an inode */ +void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version, + u32 cache_mask); + +static inline void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version); + u64 attr_valid, u64 attr_version) +{ + return fuse_change_attributes_mask(inode, attr, + attr_valid, attr_version, 0); +} void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u32 cache_mask);