Hi, Miklos: This patch adds the async option for the flush/release operation in FUSE. The async flush/release option allows a FUSE-based application to be terminated without being blocked in the flush/release operation even in the presence of complex external interactions. In addition, the async operation can be more efficient when a large number of fuse-based files is involved. --- Deadlock Example: Process A is a multi-threaded application that interacts with Process B, a FUSE-server. UNIX-domain socket App (A) ----------------------- FUSE-server (B) | | | | | | +-----------------------------------+ open/flush/release When the FUSE-server receives an open and flush/release operations from Process A, it would in turn interact with Process A (e.g., coordinating shared memory allocation and de-allocation) using the connection-oriented UNIX-domain socket. A deadlock occurs when Process A is terminating: 1) As part of process termination (i.e., do_exit() in the kernel), it would send "flush/release" to Process B, and wait for its reply due to the synchronous nature of the operation. 2) When Process B receives the "flush/release" request, it would in turn send a message to Process A (over the UNIX-domain channel) and wait for its reply. 3) As Process A is terminating, it may not be able to reply to Process B, resulting in a deadlock. The async flush/release option offers a simple and robust solution to the deadlock issue. With the async flush/release operation, all the files and sockets in Process A can be closed without being blocked, which in turn would un-block the operation in Process B using the UNIX-domain socket. --- Signed-off-by: Enke Chen <enkechen@xxxxxxxxx> Version: 4.7.0_next_20160805 fs/fuse/file.c | 39 +++++++++++++++++++++++++++------------ fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 7 ++++++- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f394aff..7dd144f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -273,7 +273,8 @@ void fuse_release_common(struct file *file, int opcode) * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. */ - fuse_file_put(ff, ff->fc->destroy_req != NULL); + fuse_file_put(ff, (ff->fc->destroy_req != NULL) && + !ff->fc->async_flush); } static int fuse_open(struct inode *inode, struct file *file) @@ -394,13 +395,19 @@ static void fuse_sync_writes(struct inode *inode) fuse_release_nowrite(inode); } +static void fuse_flush_end(struct fuse_conn *fc, struct fuse_req *req) +{ + if (req->out.h.error == -ENOSYS) + fc->no_flush = 1; +} + static int fuse_flush(struct file *file, fl_owner_t id) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; struct fuse_req *req; - struct fuse_flush_in inarg; + struct fuse_flush_in *inarg; int err; if (is_bad_inode(inode)) @@ -423,20 +430,28 @@ static int fuse_flush(struct file *file, fl_owner_t id) req = fuse_get_req_nofail_nopages(fc, file); memset(&inarg, 0, sizeof(inarg)); - inarg.fh = ff->fh; - inarg.lock_owner = fuse_lock_owner_id(fc, id); + inarg = &req->misc.flush_in; + inarg->fh = ff->fh; + inarg->lock_owner = fuse_lock_owner_id(fc, id); req->in.h.opcode = FUSE_FLUSH; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - __set_bit(FR_FORCE, &req->flags); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); - if (err == -ENOSYS) { - fc->no_flush = 1; + req->in.args[0].size = sizeof(struct fuse_flush_in); + req->in.args[0].value = inarg; + if (fc->async_flush) { + req->end = fuse_flush_end; + __set_bit(FR_BACKGROUND, &req->flags); + fuse_request_send_background(fc, req); err = 0; + } else { + __set_bit(FR_FORCE, &req->flags); + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) { + fc->no_flush = 1; + err = 0; + } } return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d98d8cc..f212cdd 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -350,6 +350,7 @@ struct fuse_req { struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; + struct fuse_flush_in flush_in; } misc; /** page vector */ @@ -624,6 +625,9 @@ struct fuse_conn { /** Is lseek not implemented by fs? */ unsigned no_lseek:1; + /** Does the filesystem want async flush? */ + unsigned async_flush:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4e05b51..2d031b1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -910,6 +910,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->writeback_cache = 1; if (arg->flags & FUSE_PARALLEL_DIROPS) fc->parallel_dirops = 1; + if (arg->flags & FUSE_ASYNC_FLUSH) + fc->async_flush = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -941,7 +943,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | - FUSE_PARALLEL_DIROPS; + FUSE_PARALLEL_DIROPS | FUSE_ASYNC_FLUSH; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 27e1736..76087d3 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -108,6 +108,9 @@ * * 7.25 * - add FUSE_PARALLEL_DIROPS + * + * 7.26 + * - add FUSE_ASYNC_FLUSH */ #ifndef _LINUX_FUSE_H @@ -143,7 +146,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 25 +#define FUSE_KERNEL_MINOR_VERSION 26 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -238,6 +241,7 @@ struct fuse_file_lock { * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir + * FUSE_ASYNC_FLUSH: asynchronous flush and release */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -258,6 +262,7 @@ struct fuse_file_lock { #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) #define FUSE_PARALLEL_DIROPS (1 << 18) +#define FUSE_ASYNC_FLUSH (1 << 19) /** * CUSE INIT request/reply flags -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html