[PATCH] FUSE: add the async option for the flush/release operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi, Miklos:

This patch adds the async option for the flush/release operation in FUSE.

The async flush/release option allows a FUSE-based application to be terminated
without being blocked in the flush/release operation even in the presence of
complex external interactions. In addition, the async operation can be more
efficient when a large number of fuse-based files is involved.

---
Deadlock Example:

    Process A is a multi-threaded application that interacts with Process B,
    a FUSE-server.


               UNIX-domain socket
    App (A)  -----------------------  FUSE-server (B)
       |                                   |
       |                                   |
       |                                   |
       +-----------------------------------+
               open/flush/release


    When the FUSE-server receives an open and flush/release operations from
    Process A, it would in turn interact with Process A (e.g., coordinating
    shared memory allocation and de-allocation) using the connection-oriented
    UNIX-domain socket.

    A deadlock occurs when Process A is terminating:

      1) As part of process termination (i.e., do_exit() in the kernel), it
         would send "flush/release" to Process B, and wait for its reply due
         to the synchronous nature of the operation.

      2) When Process B receives the "flush/release" request, it would in turn
         send a message to Process A (over the UNIX-domain channel) and wait
         for its reply.

      3) As Process A is terminating, it may not be able to reply to Process B,
         resulting in a deadlock.

   The async flush/release option offers a simple and robust solution to the
   deadlock issue.

   With the async flush/release operation, all the files and sockets in Process
   A can be closed without being blocked, which in turn would un-block the
   operation in Process B using the UNIX-domain socket.
---

Signed-off-by: Enke Chen <enkechen@xxxxxxxxx>

Version: 4.7.0_next_20160805

 fs/fuse/file.c            |   39 +++++++++++++++++++++++++++------------
 fs/fuse/fuse_i.h          |    4 ++++
 fs/fuse/inode.c           |    4 +++-
 include/uapi/linux/fuse.h |    7 ++++++-
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f394aff..7dd144f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -273,7 +273,8 @@ void fuse_release_common(struct file *file, int opcode)
 	 * synchronous RELEASE is allowed (and desirable) in this case
 	 * because the server can be trusted not to screw up.
 	 */
-	fuse_file_put(ff, ff->fc->destroy_req != NULL);
+	fuse_file_put(ff, (ff->fc->destroy_req != NULL) &&
+		      !ff->fc->async_flush);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -394,13 +395,19 @@ static void fuse_sync_writes(struct inode *inode)
 	fuse_release_nowrite(inode);
 }
 
+static void fuse_flush_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+	if (req->out.h.error == -ENOSYS)
+		fc->no_flush = 1;
+}
+
 static int fuse_flush(struct file *file, fl_owner_t id)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
 	struct fuse_req *req;
-	struct fuse_flush_in inarg;
+	struct fuse_flush_in *inarg;
 	int err;
 
 	if (is_bad_inode(inode))
@@ -423,20 +430,28 @@ static int fuse_flush(struct file *file, fl_owner_t id)
 
 	req = fuse_get_req_nofail_nopages(fc, file);
 	memset(&inarg, 0, sizeof(inarg));
-	inarg.fh = ff->fh;
-	inarg.lock_owner = fuse_lock_owner_id(fc, id);
+	inarg = &req->misc.flush_in;
+	inarg->fh = ff->fh;
+	inarg->lock_owner = fuse_lock_owner_id(fc, id);
 	req->in.h.opcode = FUSE_FLUSH;
 	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	__set_bit(FR_FORCE, &req->flags);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
-	if (err == -ENOSYS) {
-		fc->no_flush = 1;
+	req->in.args[0].size = sizeof(struct fuse_flush_in);
+	req->in.args[0].value = inarg;
+	if (fc->async_flush) {
+		req->end = fuse_flush_end;
+		__set_bit(FR_BACKGROUND, &req->flags);
+		fuse_request_send_background(fc, req);
 		err = 0;
+	} else {
+		__set_bit(FR_FORCE, &req->flags);
+		fuse_request_send(fc, req);
+		err = req->out.h.error;
+		fuse_put_request(fc, req);
+		if (err == -ENOSYS) {
+			fc->no_flush = 1;
+			err = 0;
+		}
 	}
 	return err;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d98d8cc..f212cdd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -350,6 +350,7 @@ struct fuse_req {
 			struct fuse_req *next;
 		} write;
 		struct fuse_notify_retrieve_in retrieve_in;
+		struct fuse_flush_in flush_in;
 	} misc;
 
 	/** page vector */
@@ -624,6 +625,9 @@ struct fuse_conn {
 	/** Is lseek not implemented by fs? */
 	unsigned no_lseek:1;
 
+	/** Does the filesystem want async flush? */
+	unsigned async_flush:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4e05b51..2d031b1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -910,6 +910,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->writeback_cache = 1;
 			if (arg->flags & FUSE_PARALLEL_DIROPS)
 				fc->parallel_dirops = 1;
+			if (arg->flags & FUSE_ASYNC_FLUSH)
+				fc->async_flush = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
@@ -941,7 +943,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
-		FUSE_PARALLEL_DIROPS;
+		FUSE_PARALLEL_DIROPS | FUSE_ASYNC_FLUSH;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 27e1736..76087d3 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -108,6 +108,9 @@
  *
  *  7.25
  *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_ASYNC_FLUSH
  */
 
 #ifndef _LINUX_FUSE_H
@@ -143,7 +146,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 25
+#define FUSE_KERNEL_MINOR_VERSION 26
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -238,6 +241,7 @@ struct fuse_file_lock {
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_ASYNC_FLUSH: asynchronous flush and release
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -258,6 +262,7 @@ struct fuse_file_lock {
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
 #define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_ASYNC_FLUSH	(1 << 19)
 
 /**
  * CUSE INIT request/reply flags
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux