Employ inline completion logic for read/write completions done via io_req_task_complete(). If ->uring_lock is contended, just do normal request completion, but if not, make tctx_task_work() to grab the lock and do batched inline completions in io_req_task_complete(). Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 54c4d8326944..7179e34df8e9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2061,6 +2061,8 @@ static void tctx_task_work(struct callback_head *cb) if (req->ctx != ctx) { ctx_flush_and_put(ctx, &locked); ctx = req->ctx; + /* if not contended, grab and improve batching */ + locked = mutex_trylock(&ctx->uring_lock); } req->io_task_work.func(req, &locked); node = next; @@ -2572,7 +2574,20 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) static void io_req_task_complete(struct io_kiocb *req, bool *locked) { - __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req)); + unsigned int cflags = io_put_rw_kbuf(req); + long res = req->result; + + if (*locked) { + struct io_ring_ctx *ctx = req->ctx; + struct io_submit_state *state = &ctx->submit_state; + + io_req_complete_state(req, res, cflags); + state->compl_reqs[state->compl_nr++] = req; + if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) + io_submit_flush_completions(ctx); + } else { + io_req_complete_post(req, res, cflags); + } } static void __io_complete_rw(struct io_kiocb *req, long res, long res2, -- 2.32.0