[PATCH 3/3] io_uring: IRQ rw completion batching

Pavel Begunkov <asml.silence@xxxxxxxxx> · Wed, 18 Aug 2021 12:42:47 +0100

Employ inline completion logic for read/write completions done via
io_req_task_complete(). If ->uring_lock is contended, just do normal
request completion, but if not, make tctx_task_work() to grab the lock
and do batched inline completions in io_req_task_complete().

Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
---
 fs/io_uring.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 54c4d8326944..7179e34df8e9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2061,6 +2061,8 @@ static void tctx_task_work(struct callback_head *cb)
 			if (req->ctx != ctx) {
 				ctx_flush_and_put(ctx, &locked);
 				ctx = req->ctx;
+				/* if not contended, grab and improve batching */
+				locked = mutex_trylock(&ctx->uring_lock);
 			}
 			req->io_task_work.func(req, &locked);
 			node = next;
@@ -2572,7 +2574,20 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 
 static void io_req_task_complete(struct io_kiocb *req, bool *locked)
 {
-	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+	unsigned int cflags = io_put_rw_kbuf(req);
+	long res = req->result;
+
+	if (*locked) {
+		struct io_ring_ctx *ctx = req->ctx;
+		struct io_submit_state *state = &ctx->submit_state;
+
+		io_req_complete_state(req, res, cflags);
+		state->compl_reqs[state->compl_nr++] = req;
+		if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+			io_submit_flush_completions(ctx);
+	} else {
+		io_req_complete_post(req, res, cflags);
+	}
 }
 
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
-- 
2.32.0