Currently, we take fc->lock there only to check for fc->connected. But this flag is changed only on connection abort, which is very rare operation. Good thing looks to make fuse_request_send_background() faster, while fuse_abort_conn() slowler. So, we make fuse_request_send_background() lockless and mark (fc->connected == 1) region as RCU-protected. Abort function just uses synchronize_sched() to wait till all pending background requests is being queued, and then makes ordinary abort. Note, that synchronize_sched() is used instead of synchronize_rcu(), since we want to check for fc->connected without rcu_dereference() in fuse_request_send_background() (i.e., not to add memory barriers to this hot path). Also, note fuse_conn::aborting field is introduced. It's aimed to synchronize two sequential aborts executing in parallel: we do not want the secondly called abort finishes before the first one is done, after it sees fc->connected has already became 0. Our storage test shows performance increase on parallel read and write with the patchset (7 test runs, 300 seconds of execution of each, aio): ./io_iops --read --write --iops -u 48g -s 4k -p 24 -n 24 --aio -q 128 -t 300 -f /mnt/vstorage/file Before (iops): 25721.58203 (worst) 26179.43359 26092.58594 25789.96484 26238.63477 (best) 25985.88867 25987.66406 After (iops): 26852.27539 27547.60742 26322.29688 27643.33398 (best) 26753.10547 26157.91016 (worst) 26694.63477 All *after* runs are better than any *before* run, except of one. The best *after* run is 5% better than the best *before* run. The worst *after* run is 1.7% better then the worst *before* run. Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx> --- fs/fuse/dev.c | 17 ++++++++++++----- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9690e7c79df7..064a65ca7283 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -575,8 +575,6 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) } /* - * Called under fc->lock - * * fc->connected must have been checked previously */ void fuse_request_send_background_nocheck(struct fuse_conn *fc, @@ -604,12 +602,12 @@ void fuse_request_send_background_nocheck(struct fuse_conn *fc, void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { BUG_ON(!req->end); - spin_lock(&fc->lock); + rcu_read_lock_sched(); if (fc->connected) { fuse_request_send_background_nocheck(fc, req); - spin_unlock(&fc->lock); + rcu_read_unlock_sched(); } else { - spin_unlock(&fc->lock); + rcu_read_unlock_sched(); req->out.h.error = -ENOTCONN; req->end(fc, req); fuse_put_request(fc, req); @@ -2107,6 +2105,13 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) LIST_HEAD(to_end); fc->connected = 0; + fc->aborting = true; + spin_unlock(&fc->lock); + + /* Propagate fc->connected */ + synchronize_sched(); + + spin_lock(&fc->lock); fc->aborted = is_abort; fuse_set_initialized(fc); list_for_each_entry(fud, &fc->devices, entry) { @@ -2145,12 +2150,14 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) spin_unlock(&fiq->waitq.lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); end_polls(fc); + fc->aborting = false; wake_up_all(&fc->blocked_waitq); spin_unlock(&fc->lock); end_requests(fc, &to_end); } else { spin_unlock(&fc->lock); + wait_event(fc->blocked_waitq, !fc->aborting); } } EXPORT_SYMBOL_GPL(fuse_abort_conn); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b2c6a1cd1e2c..b9a991b42c88 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -523,6 +523,9 @@ struct fuse_conn { abort and device release */ unsigned connected; + /** Connection is now aborting */ + bool aborting; + /** Connection aborted via sysfs */ bool aborted; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a4285ec7c248..9612eb4bc609 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -624,6 +624,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) fc->blocked = 0; fc->initialized = 0; fc->connected = 1; + fc->aborting = false; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current));