On Tue, Apr 26, 2022 at 9:09 PM Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > > On Mon, Apr 25, 2022 at 09:52:44PM +0800, Jiachen Zhang wrote: > > > Some users may want both the high performance of writeback mode and a > > little bit more consistency among FUSE mounts. In the current > > writeback mode implementation, users of one FUSE mount can never see > > the file expansion done by other FUSE mounts. > > Okay. > > Here's a preliminary patch that you could try. > > Thanks, > Miklos > Hi, Miklos, Thanks for the patch, and sorry for the late reply. I have already tried on an older kernel version based on the same idea of your patch, and it works fine for some simple manual tests. I still have some questions about this patch. > --- > fs/fuse/dir.c | 35 ++++++++++++++++++++++------------- > fs/fuse/file.c | 17 +++++++++++++++-- > fs/fuse/fuse_i.h | 14 +++++++++++++- > fs/fuse/inode.c | 32 +++++++++++++++++++++++++++----- > include/uapi/linux/fuse.h | 5 +++++ > 5 files changed, 82 insertions(+), 21 deletions(-) > > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -194,6 +194,7 @@ > * - add FUSE_SECURITY_CTX init flag > * - add security context to create, mkdir, symlink, and mknod requests > * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX > + * - add FUSE_WRITEBACK_CACHE_V2 init flag > */ > > #ifndef _LINUX_FUSE_H > @@ -353,6 +354,9 @@ struct fuse_file_lock { > * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and > * mknod > * FUSE_HAS_INODE_DAX: use per inode DAX > + * FUSE_WRITEBACK_CACHE_V2: > + * - allow time/size to be refreshed if no pending write > + * - time/size not cached for falocate/copy_file_range > */ > #define FUSE_ASYNC_READ (1 << 0) > #define FUSE_POSIX_LOCKS (1 << 1) > @@ -389,6 +393,7 @@ struct fuse_file_lock { > /* bits 32..63 get shifted down 32 bits into the flags2 field */ > #define FUSE_SECURITY_CTX (1ULL << 32) > #define FUSE_HAS_INODE_DAX (1ULL << 33) > +#define FUSE_WRITEBACK_CACHE_V2 (1ULL << 34) > > /** > * CUSE INIT request/reply flags > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -222,19 +222,37 @@ void fuse_change_attributes_common(struc > u32 fuse_get_cache_mask(struct inode *inode) > { > struct fuse_conn *fc = get_fuse_conn(inode); > + struct fuse_inode *fi = get_fuse_inode(inode); > > if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) > return 0; > > + /* > + * In writeback_cache_v2 mode if all the following conditions are met, > + * then allow the attributes to be refreshed: > + * > + * - inode is not dirty (I_DIRTY_INODE) > + * - inode is not in the process of being written (I_SYNC) > + * - inode has no dirty pages (I_DIRTY_PAGES) > + * - inode does not have any page writeback in progress > + * > + * Note: checking PAGECACHE_TAG_WRITEBACK is not sufficient in fuse, > + * since inode can appear to have no PageWriteback pages, yet still have > + * outstanding write request. > + */ > + if (fc->writeback_cache_v2 && !(inode->i_state & (I_DIRTY | I_SYNC)) && > + RB_EMPTY_ROOT(&fi->writepages)) > + return 0; > + Do we need to lock the inode between this cleanness checking and attr updating? There may be writes between the two procedures and we should not update the attributes in such cases. > return STATX_MTIME | STATX_CTIME | STATX_SIZE; > } > > -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, > - u64 attr_valid, u64 attr_version) > +void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr, > + u64 attr_valid, u64 attr_version, > + u32 cache_mask) > { > struct fuse_conn *fc = get_fuse_conn(inode); > struct fuse_inode *fi = get_fuse_inode(inode); > - u32 cache_mask; > loff_t oldsize; > struct timespec64 old_mtime; > > @@ -244,7 +262,7 @@ void fuse_change_attributes(struct inode > * may update i_size. In these cases trust the cached value in the > * inode. > */ > - cache_mask = fuse_get_cache_mask(inode); > + cache_mask |= fuse_get_cache_mask(inode); Could we get the cache_mask only based on the fuse_get_cache_mask() in fuse_change_attributes()? Why the fuse_get_cache_mask() should be called multiple times in fuse_dentry_revalidate() and fuse_do_setattr() cases? > if (cache_mask & STATX_SIZE) > attr->size = i_size_read(inode); > > @@ -1153,6 +1171,10 @@ static void process_init_reply(struct fu > fc->async_dio = 1; > if (flags & FUSE_WRITEBACK_CACHE) > fc->writeback_cache = 1; > + if (flags & FUSE_WRITEBACK_CACHE_V2) { > + fc->writeback_cache = 1; > + fc->writeback_cache_v2 = 1; > + } > if (flags & FUSE_PARALLEL_DIROPS) > fc->parallel_dirops = 1; > if (flags & FUSE_HANDLE_KILLPRIV) > @@ -1234,7 +1256,7 @@ void fuse_send_init(struct fuse_mount *f > FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | > FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | > FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | > - FUSE_SECURITY_CTX; > + FUSE_SECURITY_CTX | FUSE_WRITEBACK_CACHE_V2; > #ifdef CONFIG_FUSE_DAX > if (fm->fc->dax) > flags |= FUSE_MAP_ALIGNMENT; > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -213,6 +213,7 @@ static int fuse_dentry_revalidate(struct > FUSE_ARGS(args); > struct fuse_forget_link *forget; > u64 attr_version; > + u32 cache_mask; > > /* For negative dentries, always do a fresh lookup */ > if (!inode) > @@ -230,6 +231,7 @@ static int fuse_dentry_revalidate(struct > goto out; > > attr_version = fuse_get_attr_version(fm->fc); > + cache_mask = fuse_get_cache_mask(inode); > > parent = dget_parent(entry); > fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), > @@ -259,9 +261,9 @@ static int fuse_dentry_revalidate(struct > goto invalid; > > forget_all_cached_acls(inode); > - fuse_change_attributes(inode, &outarg.attr, > - entry_attr_timeout(&outarg), > - attr_version); > + fuse_change_attributes_mask(inode, &outarg.attr, > + entry_attr_timeout(&outarg), > + attr_version, cache_mask); > fuse_change_entry_timeout(entry, &outarg); > } else if (inode) { > fi = get_fuse_inode(inode); > @@ -836,16 +838,23 @@ static int fuse_symlink(struct user_name > > void fuse_flush_time_update(struct inode *inode) > { > - int err = sync_inode_metadata(inode, 1); > + struct fuse_conn *fc = get_fuse_conn(inode); > + int err; > > - mapping_set_error(inode->i_mapping, err); > + if (!fc->writeback_cache_v2) { > + err = sync_inode_metadata(inode, 1); > + mapping_set_error(inode->i_mapping, err); > + } > } > > static void fuse_update_ctime_in_cache(struct inode *inode) > { > if (!IS_NOCMTIME(inode)) { > + struct fuse_conn *fc = get_fuse_conn(inode); > + > inode->i_ctime = current_time(inode); > - mark_inode_dirty_sync(inode); > + if (!fc->writeback_cache_v2) > + mark_inode_dirty_sync(inode); > fuse_flush_time_update(inode); > } > } > @@ -1065,7 +1074,7 @@ static void fuse_fillattr(struct inode * > } > > static int fuse_do_getattr(struct inode *inode, struct kstat *stat, > - struct file *file) > + struct file *file, u32 cache_mask) > { > int err; > struct fuse_getattr_in inarg; > @@ -1100,9 +1109,9 @@ static int fuse_do_getattr(struct inode > fuse_make_bad(inode); > err = -EIO; > } else { > - fuse_change_attributes(inode, &outarg.attr, > - attr_timeout(&outarg), > - attr_version); > + fuse_change_attributes_mask(inode, &outarg.attr, > + attr_timeout(&outarg), > + attr_version, cache_mask); > if (stat) > fuse_fillattr(inode, &outarg.attr, stat); > } > @@ -1131,7 +1140,7 @@ static int fuse_update_get_attr(struct i > > if (sync) { > forget_all_cached_acls(inode); > - err = fuse_do_getattr(inode, stat, file); > + err = fuse_do_getattr(inode, stat, file, cache_mask); > } else if (stat) { > generic_fillattr(&init_user_ns, inode, stat); > stat->mode = fi->orig_i_mode; > @@ -1277,7 +1286,7 @@ static int fuse_perm_getattr(struct inod > return -ECHILD; > > forget_all_cached_acls(inode); > - return fuse_do_getattr(inode, NULL, NULL); > + return fuse_do_getattr(inode, NULL, NULL, 0); > } > > /* > @@ -1833,7 +1842,7 @@ static int fuse_setattr(struct user_name > * ia_mode calculation may have used stale i_mode. > * Refresh and recalculate. > */ > - ret = fuse_do_getattr(inode, NULL, file); > + ret = fuse_do_getattr(inode, NULL, file, 0); > if (ret) > return ret; > > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -2949,6 +2949,19 @@ static int fuse_writeback_range(struct i > return err; > } > > +static void fuse_update_time(struct file *file) > +{ > + struct inode *inode = file_inode(file); > + struct fuse_conn *fc = get_fuse_conn(inode); > + > + if (!IS_NOCMTIME(inode)) { > + if (fc->writeback_cache_v2) > + inode->i_mtime = inode->i_ctime = current_time(inode); > + else > + file_update_time(file); > + } > +} > + > static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, > loff_t length) > { > @@ -3021,7 +3034,7 @@ static long fuse_file_fallocate(struct f > /* we could have extended the file */ > if (!(mode & FALLOC_FL_KEEP_SIZE)) { > if (fuse_write_update_attr(inode, offset + length, length)) > - file_update_time(file); > + fuse_update_time(file); > } > > if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) > @@ -3135,7 +3148,7 @@ static ssize_t __fuse_copy_file_range(st > ALIGN_DOWN(pos_out, PAGE_SIZE), > ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); > > - file_update_time(file_out); > + fuse_update_time(file_out); To avoid setting I_DIRTY_SYNC in writeback_cache_v2 mode, maybe we should replace all file_update_time() in fuse by fuse_update_time(). There are also file_update_time()s in fuse_page_mkwrite(), fuse_cache_write_iter(), and fuse_finish_open(). > fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); > > err = outarg.size; > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -654,6 +654,9 @@ struct fuse_conn { > /* show legacy mount options */ > unsigned int legacy_opts_show:1; > > + /* Improved writeback cache policy */ > + unsigned writeback_cache_v2:1; > + > /* > * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on > * write/trunc only if caller did not have CAP_FSETID. sgid is killed > @@ -1049,8 +1052,17 @@ void fuse_init_symlink(struct inode *ino > /** > * Change attributes of an inode > */ > +void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr, > + u64 attr_valid, u64 attr_version, > + u32 cache_mask); > + > +static inline > void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, > - u64 attr_valid, u64 attr_version); > + u64 attr_valid, u64 attr_version) > +{ > + return fuse_change_attributes_mask(inode, attr, > + attr_valid, attr_version, 0); > +} > > void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, > u64 attr_valid, u32 cache_mask); Also, maybe we can add a FOPEN_INVAL_ATTR flag [1] to invalidate the attr cache on file open. With the writeback v2 patch and FOPEN_INVAL_ATTR, the close-to-open consistency revalidations can be implemented in user-space fuse daemons. [1] https://lore.kernel.org/linux-fsdevel/20220608104202.19461-1-zhangjiachen.jaycee@xxxxxxxxxxxxx/ Thanks, Jiachen