Save bulk allocated requests across io_uring_enter(), so lower QD also could benefit from that. This is not much of an optimisation, and for current cache sizes would probably affect only offloaded ~QD=1. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5022eb4cb9a4..82df6171baae 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -834,6 +834,25 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return NULL; } +static void io_init_submit_state(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; + + state->mm = (ctx->flags & IORING_SETUP_SQPOLL) ? NULL : ctx->sqo_mm; + + state->free_reqs = 0; + state->cur_req = 0; +} + +static void io_clear_submit_state(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; + + if (state->free_reqs) + kmem_cache_free_bulk(req_cachep, state->free_reqs, + &state->reqs[state->cur_req]); +} + static inline bool __req_need_defer(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1132,10 +1151,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx) struct io_submit_state *state = &ctx->submit_state; if (!state->free_reqs) { - size_t sz; + size_t sz = ARRAY_SIZE(state->reqs); int ret; - sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); /* @@ -4759,9 +4777,6 @@ static void io_submit_end(struct io_ring_ctx *ctx) struct io_submit_state *state = &ctx->submit_state; io_file_put(state); - if (state->free_reqs) - kmem_cache_free_bulk(req_cachep, state->free_reqs, - &state->reqs[state->cur_req]); if (state->link) io_queue_link_head(state->link); } @@ -4774,7 +4789,6 @@ static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios, { struct io_submit_state *state = &ctx->submit_state; - state->free_reqs = 0; state->file = NULL; state->ios_left = max_ios; @@ -5765,12 +5779,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, int ret; init_waitqueue_head(&ctx->sqo_wait); - mmgrab(current->mm); - ctx->sqo_mm = current->mm; - - ctx->submit_state.mm = NULL; - if (!(ctx->flags & IORING_SETUP_SQPOLL)) - ctx->submit_state.mm = ctx->sqo_mm; if (ctx->flags & IORING_SETUP_SQPOLL) { ret = -EPERM; @@ -6146,6 +6154,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) if (ctx->sqo_mm) mmdrop(ctx->sqo_mm); + io_clear_submit_state(ctx); + io_iopoll_reap_events(ctx); io_sqe_buffer_unregister(ctx); io_sqe_files_unregister(ctx); @@ -6584,6 +6594,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret) goto err; + mmgrab(current->mm); + ctx->sqo_mm = current->mm; + io_init_submit_state(ctx); + ret = io_sq_offload_start(ctx, p); if (ret) goto err; -- 2.24.0