Now that we have the submit_state in the ring itself, we can have io_kiocb allocations that are persistent across invocations. This reduces the time spent doing slab allocations and frees. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 58 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ab3f842869dd..502bdef41460 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -263,8 +263,9 @@ struct io_sq_data { #define IO_COMPL_BATCH 32 struct io_comp_state { - unsigned int nr; struct io_kiocb *reqs[IO_COMPL_BATCH]; + unsigned int nr; + struct list_head free_list; }; struct io_submit_state { @@ -1290,7 +1291,6 @@ static inline bool io_is_timeout_noseq(struct io_kiocb *req) static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) { - struct io_submit_state *submit_state; struct io_ring_ctx *ctx; int hash_bits; @@ -1343,10 +1343,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); init_llist_head(&ctx->rsrc_put_llist); - submit_state = &ctx->submit_state; - submit_state->comp.nr = 0; - submit_state->file_refs = 0; - submit_state->free_reqs = 0; + INIT_LIST_HEAD(&ctx->submit_state.comp.free_list); return ctx; err: if (ctx->fallback_req) @@ -1969,6 +1966,14 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) { struct io_submit_state *state = &ctx->submit_state; + if (!list_empty(&state->comp.free_list)) { + struct io_kiocb *req; + + req = list_first_entry(&state->comp.free_list, struct io_kiocb, + compl.list); + list_del(&req->compl.list); + return req; + } if (!state->free_reqs) { gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; size_t sz; @@ -2255,33 +2260,25 @@ static void io_free_req(struct io_kiocb *req) } struct req_batch { - void *reqs[IO_IOPOLL_BATCH]; - int to_free; - struct task_struct *task; int task_refs; + int ctx_refs; }; static inline void io_init_req_batch(struct req_batch *rb) { - rb->to_free = 0; + rb->ctx_refs = 0; rb->task_refs = 0; rb->task = NULL; } -static void __io_req_free_batch_flush(struct io_ring_ctx *ctx, - struct req_batch *rb) -{ - kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs); - percpu_ref_put_many(&ctx->refs, rb->to_free); - rb->to_free = 0; -} - static void io_req_free_batch_finish(struct io_ring_ctx *ctx, struct req_batch *rb) { - if (rb->to_free) - __io_req_free_batch_flush(ctx, rb); + if (rb->ctx_refs) { + percpu_ref_put_many(&ctx->refs, rb->ctx_refs); + rb->ctx_refs = 0; + } if (rb->task) { io_put_task(rb->task, rb->task_refs); rb->task = NULL; @@ -2290,6 +2287,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) { + struct io_comp_state *cs = &req->ctx->submit_state.comp; + if (unlikely(io_is_fallback_req(req))) { io_free_req(req); return; @@ -2305,9 +2304,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) rb->task_refs++; io_dismantle_req(req); - rb->reqs[rb->to_free++] = req; - if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs))) - __io_req_free_batch_flush(req->ctx, rb); + rb->ctx_refs++; + list_add(&req->compl.list, &cs->free_list); } static void io_submit_flush_completions(struct io_comp_state *cs, @@ -8668,6 +8666,19 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) idr_destroy(&ctx->io_buffer_idr); } +static void io_req_cache_free(struct io_ring_ctx *ctx) +{ + struct io_comp_state *cs = &ctx->submit_state.comp; + + while (!list_empty(&cs->free_list)) { + struct io_kiocb *req; + + req = list_first_entry(&cs->free_list, struct io_kiocb, compl.list); + list_del(&req->compl.list); + kmem_cache_free(req_cachep, req); + } +} + static void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_finish_async(ctx); @@ -8705,6 +8716,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) put_cred(ctx->creds); kfree(ctx->cancel_hash); kmem_cache_free(req_cachep, ctx->fallback_req); + io_req_cache_free(ctx); kfree(ctx); } -- 2.30.0