Re: Re: Re: [RFC PATCH] fuse: support cache revalidation in writeback_cache mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 25, 2022 at 09:52:44PM +0800, Jiachen Zhang wrote:

> Some users may want both the high performance of writeback mode and a
> little bit more consistency among FUSE mounts. In the current
> writeback mode implementation, users of one FUSE mount can never see
> the file expansion done by other FUSE mounts.

Okay.

Here's a preliminary patch that you could try.

Thanks,
Miklos

---
 fs/fuse/dir.c             |   35 ++++++++++++++++++++++-------------
 fs/fuse/file.c            |   17 +++++++++++++++--
 fs/fuse/fuse_i.h          |   14 +++++++++++++-
 fs/fuse/inode.c           |   32 +++++++++++++++++++++++++++-----
 include/uapi/linux/fuse.h |    5 +++++
 5 files changed, 82 insertions(+), 21 deletions(-)

--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -194,6 +194,7 @@
  *  - add FUSE_SECURITY_CTX init flag
  *  - add security context to create, mkdir, symlink, and mknod requests
  *  - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *  - add FUSE_WRITEBACK_CACHE_V2 init flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -353,6 +354,9 @@ struct fuse_file_lock {
  * FUSE_SECURITY_CTX:	add security context to create, mkdir, symlink, and
  *			mknod
  * FUSE_HAS_INODE_DAX:  use per inode DAX
+ * FUSE_WRITEBACK_CACHE_V2:
+ *			- allow time/size to be refreshed if no pending write
+ * 			- time/size not cached for falocate/copy_file_range
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -389,6 +393,7 @@ struct fuse_file_lock {
 /* bits 32..63 get shifted down 32 bits into the flags2 field */
 #define FUSE_SECURITY_CTX	(1ULL << 32)
 #define FUSE_HAS_INODE_DAX	(1ULL << 33)
+#define FUSE_WRITEBACK_CACHE_V2	(1ULL << 34)
 
 /**
  * CUSE INIT request/reply flags
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -222,19 +222,37 @@ void fuse_change_attributes_common(struc
 u32 fuse_get_cache_mask(struct inode *inode)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 
 	if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
 		return 0;
 
+	/*
+	 * In writeback_cache_v2 mode if all the following conditions are met,
+	 * then allow the attributes to be refreshed:
+	 *
+	 * - inode is not dirty (I_DIRTY_INODE)
+	 * - inode is not in the process of being written (I_SYNC)
+	 * - inode has no dirty pages (I_DIRTY_PAGES)
+	 * - inode does not have any page writeback in progress
+	 *
+	 * Note: checking PAGECACHE_TAG_WRITEBACK is not sufficient in fuse,
+	 * since inode can appear to have no PageWriteback pages, yet still have
+	 * outstanding write request.
+	 */
+	if (fc->writeback_cache_v2 && !(inode->i_state & (I_DIRTY | I_SYNC)) &&
+	    RB_EMPTY_ROOT(&fi->writepages))
+		return 0;
+
 	return STATX_MTIME | STATX_CTIME | STATX_SIZE;
 }
 
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
-			    u64 attr_valid, u64 attr_version)
+void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr,
+				 u64 attr_valid, u64 attr_version,
+				 u32 cache_mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	u32 cache_mask;
 	loff_t oldsize;
 	struct timespec64 old_mtime;
 
@@ -244,7 +262,7 @@ void fuse_change_attributes(struct inode
 	 * may update i_size.  In these cases trust the cached value in the
 	 * inode.
 	 */
-	cache_mask = fuse_get_cache_mask(inode);
+	cache_mask |= fuse_get_cache_mask(inode);
 	if (cache_mask & STATX_SIZE)
 		attr->size = i_size_read(inode);
 
@@ -1153,6 +1171,10 @@ static void process_init_reply(struct fu
 				fc->async_dio = 1;
 			if (flags & FUSE_WRITEBACK_CACHE)
 				fc->writeback_cache = 1;
+			if (flags & FUSE_WRITEBACK_CACHE_V2) {
+				fc->writeback_cache = 1;
+				fc->writeback_cache_v2 = 1;
+			}
 			if (flags & FUSE_PARALLEL_DIROPS)
 				fc->parallel_dirops = 1;
 			if (flags & FUSE_HANDLE_KILLPRIV)
@@ -1234,7 +1256,7 @@ void fuse_send_init(struct fuse_mount *f
 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
 		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
 		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
-		FUSE_SECURITY_CTX;
+		FUSE_SECURITY_CTX | FUSE_WRITEBACK_CACHE_V2;
 #ifdef CONFIG_FUSE_DAX
 	if (fm->fc->dax)
 		flags |= FUSE_MAP_ALIGNMENT;
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -213,6 +213,7 @@ static int fuse_dentry_revalidate(struct
 		FUSE_ARGS(args);
 		struct fuse_forget_link *forget;
 		u64 attr_version;
+		u32 cache_mask;
 
 		/* For negative dentries, always do a fresh lookup */
 		if (!inode)
@@ -230,6 +231,7 @@ static int fuse_dentry_revalidate(struct
 			goto out;
 
 		attr_version = fuse_get_attr_version(fm->fc);
+		cache_mask = fuse_get_cache_mask(inode);
 
 		parent = dget_parent(entry);
 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
@@ -259,9 +261,9 @@ static int fuse_dentry_revalidate(struct
 			goto invalid;
 
 		forget_all_cached_acls(inode);
-		fuse_change_attributes(inode, &outarg.attr,
-				       entry_attr_timeout(&outarg),
-				       attr_version);
+		fuse_change_attributes_mask(inode, &outarg.attr,
+					    entry_attr_timeout(&outarg),
+					    attr_version, cache_mask);
 		fuse_change_entry_timeout(entry, &outarg);
 	} else if (inode) {
 		fi = get_fuse_inode(inode);
@@ -836,16 +838,23 @@ static int fuse_symlink(struct user_name
 
 void fuse_flush_time_update(struct inode *inode)
 {
-	int err = sync_inode_metadata(inode, 1);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int err;
 
-	mapping_set_error(inode->i_mapping, err);
+	if (!fc->writeback_cache_v2) {
+		err = sync_inode_metadata(inode, 1);
+		mapping_set_error(inode->i_mapping, err);
+	}
 }
 
 static void fuse_update_ctime_in_cache(struct inode *inode)
 {
 	if (!IS_NOCMTIME(inode)) {
+		struct fuse_conn *fc = get_fuse_conn(inode);
+
 		inode->i_ctime = current_time(inode);
-		mark_inode_dirty_sync(inode);
+		if (!fc->writeback_cache_v2)
+			mark_inode_dirty_sync(inode);
 		fuse_flush_time_update(inode);
 	}
 }
@@ -1065,7 +1074,7 @@ static void fuse_fillattr(struct inode *
 }
 
 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
-			   struct file *file)
+			   struct file *file, u32 cache_mask)
 {
 	int err;
 	struct fuse_getattr_in inarg;
@@ -1100,9 +1109,9 @@ static int fuse_do_getattr(struct inode
 			fuse_make_bad(inode);
 			err = -EIO;
 		} else {
-			fuse_change_attributes(inode, &outarg.attr,
-					       attr_timeout(&outarg),
-					       attr_version);
+			fuse_change_attributes_mask(inode, &outarg.attr,
+						    attr_timeout(&outarg),
+						    attr_version, cache_mask);
 			if (stat)
 				fuse_fillattr(inode, &outarg.attr, stat);
 		}
@@ -1131,7 +1140,7 @@ static int fuse_update_get_attr(struct i
 
 	if (sync) {
 		forget_all_cached_acls(inode);
-		err = fuse_do_getattr(inode, stat, file);
+		err = fuse_do_getattr(inode, stat, file, cache_mask);
 	} else if (stat) {
 		generic_fillattr(&init_user_ns, inode, stat);
 		stat->mode = fi->orig_i_mode;
@@ -1277,7 +1286,7 @@ static int fuse_perm_getattr(struct inod
 		return -ECHILD;
 
 	forget_all_cached_acls(inode);
-	return fuse_do_getattr(inode, NULL, NULL);
+	return fuse_do_getattr(inode, NULL, NULL, 0);
 }
 
 /*
@@ -1833,7 +1842,7 @@ static int fuse_setattr(struct user_name
 			 * ia_mode calculation may have used stale i_mode.
 			 * Refresh and recalculate.
 			 */
-			ret = fuse_do_getattr(inode, NULL, file);
+			ret = fuse_do_getattr(inode, NULL, file, 0);
 			if (ret)
 				return ret;
 
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2949,6 +2949,19 @@ static int fuse_writeback_range(struct i
 	return err;
 }
 
+static void fuse_update_time(struct file *file)
+{
+	struct inode *inode = file_inode(file);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!IS_NOCMTIME(inode)) {
+		if (fc->writeback_cache_v2)
+			inode->i_mtime = inode->i_ctime = current_time(inode);
+		else
+			file_update_time(file);
+	}
+}
+
 static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 				loff_t length)
 {
@@ -3021,7 +3034,7 @@ static long fuse_file_fallocate(struct f
 	/* we could have extended the file */
 	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 		if (fuse_write_update_attr(inode, offset + length, length))
-			file_update_time(file);
+			fuse_update_time(file);
 	}
 
 	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
@@ -3135,7 +3148,7 @@ static ssize_t __fuse_copy_file_range(st
 				   ALIGN_DOWN(pos_out, PAGE_SIZE),
 				   ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
 
-	file_update_time(file_out);
+	fuse_update_time(file_out);
 	fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size);
 
 	err = outarg.size;
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -654,6 +654,9 @@ struct fuse_conn {
 	/* show legacy mount options */
 	unsigned int legacy_opts_show:1;
 
+	/* Improved writeback cache policy */
+	unsigned writeback_cache_v2:1;
+
 	/*
 	 * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on
 	 * write/trunc only if caller did not have CAP_FSETID.  sgid is killed
@@ -1049,8 +1052,17 @@ void fuse_init_symlink(struct inode *ino
 /**
  * Change attributes of an inode
  */
+void fuse_change_attributes_mask(struct inode *inode, struct fuse_attr *attr,
+				 u64 attr_valid, u64 attr_version,
+				 u32 cache_mask);
+
+static inline
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
-			    u64 attr_valid, u64 attr_version);
+			    u64 attr_valid, u64 attr_version)
+{
+	return fuse_change_attributes_mask(inode, attr,
+					   attr_valid, attr_version, 0);
+}
 
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 				   u64 attr_valid, u32 cache_mask);



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux