This is another performance optimization - async requests are better served on another core. Async blocking requests are marked as such and treated as sync requests. Example with mmap read fio --size=1G --numjobs=32 --ioengine=mmap --output-format=normal,terse\ --directory=/scratch/dest/ --rw=read --bs=4K --group_reporting \ job-file.fio jobs /dev/fuse uring gain uring gain gain (core+1) (to dev) (to dev) (uring same-core) 1 124.61 306.59 2.46 255.51 2.05 0.83 2 248.83 580.00 2.33 563.00 2.26 0.97 4 611.47 1049.65 1.72 998.57 1.63 0.95 8 1499.95 1848.42 1.23 1990.64 1.33 1.08 16 2206.30 2890.24 1.31 3439.13 1.56 1.19 24 2545.68 2704.87 1.06 4527.63 1.78 1.67 32 2233.52 2574.37 1.15 5263.09 2.36 2.04 Interesting here is that the max gain comes with more core usage, I had actually expected the other way around. Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> --- fs/fuse/dev_uring.c | 5 ++++- fs/fuse/file.c | 1 + fs/fuse/fuse_i.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index fe80e66150c3..dff210658172 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -1106,6 +1106,8 @@ int fuse_uring_queue_fuse_req(struct fuse_conn *fc, struct fuse_req *req) struct list_head *req_queue, *ent_queue; if (ring->per_core_queue) { + int cpu_off; + /* * async requests are best handled on another core, the current * core can do application/page handling, while the async request @@ -1118,7 +1120,8 @@ int fuse_uring_queue_fuse_req(struct fuse_conn *fc, struct fuse_req *req) * It should also not persistently switch between cores - makes * it hard for the scheduler. */ - qid = task_cpu(current); + cpu_off = async ? 1 : 0; + qid = (task_cpu(current) + cpu_off) % ring->nr_queues; if (unlikely(qid >= ring->nr_queues)) { WARN_ONCE(1, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b57ce4157640..6fda1e7bd7f4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -791,6 +791,7 @@ static ssize_t fuse_async_req_send(struct fuse_mount *fm, ia->ap.args.end = fuse_aio_complete_req; ia->ap.args.may_block = io->should_dirty; + ia->ap.args.async_blocking = io->blocking; err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); if (err) fuse_aio_complete_req(fm, &ia->ap.args, err); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fadc51a22bb9..7dcf0472df67 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -309,6 +309,7 @@ struct fuse_args { bool may_block:1; bool is_ext:1; bool is_pinned:1; + bool async_blocking : 1; struct fuse_in_arg in_args[3]; struct fuse_arg out_args[2]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); -- 2.40.1