tctx_task_work() tries to fetch a next batch of requests, but before it would flush completions from the previous batch that may be sub-optimal. E.g. io_req_task_queue() executes a head of the link where all the linked may be enqueued through the same io_req_task_queue(). And there are more cases for that. Do the flushing at the end, so it can cache completions of several waves of a single tctx_task_work(), and do the flush at the very end. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d8bc4f82efd1..f31f00c6e829 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1890,13 +1890,13 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx) static void tctx_task_work(struct callback_head *cb) { + struct io_ring_ctx *ctx = NULL; struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work); clear_bit(0, &tctx->task_state); while (!wq_list_empty(&tctx->task_list)) { - struct io_ring_ctx *ctx = NULL; struct io_wq_work_list list; struct io_wq_work_node *node; @@ -1920,11 +1920,12 @@ static void tctx_task_work(struct callback_head *cb) node = next; } - ctx_flush_and_put(ctx); if (!list.first) break; cond_resched(); } + + ctx_flush_and_put(ctx); } static int io_req_task_work_add(struct io_kiocb *req) -- 2.31.1