Batch putting down rsrc_node refs. Since struct req_batch is used even more extensively, can shed some extra cycles. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- Pretty much a (working) RFC. I wouldn't find any difference with my setup, but in case you will be striving to close the remaining 5%. fs/io_uring.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fea1342aeef..b7b0d76453ca 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1930,6 +1930,8 @@ static inline void io_req_complete_post(struct io_kiocb *req, long res, io_dismantle_req(req); io_put_task(req->task, 1); + if (req->fixed_rsrc_refs) + percpu_ref_put(req->fixed_rsrc_refs); list_add(&req->compl.list, &cs->locked_free_list); cs->locked_free_nr++; } else @@ -2044,8 +2046,6 @@ static void io_dismantle_req(struct io_kiocb *req) kfree(req->async_data); if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - if (req->fixed_rsrc_refs) - percpu_ref_put(req->fixed_rsrc_refs); io_req_clean_work(req); } @@ -2065,6 +2065,8 @@ static void __io_free_req(struct io_kiocb *req) io_dismantle_req(req); io_put_task(req->task, 1); + if (req->fixed_rsrc_refs) + percpu_ref_put(req->fixed_rsrc_refs); kmem_cache_free(req_cachep, req); percpu_ref_put(&ctx->refs); @@ -2381,6 +2383,9 @@ struct req_batch { struct task_struct *task; int task_refs; int ctx_refs; + + struct percpu_ref *rsrc_refs; + unsigned int rsrc_refs_nr; }; static inline void io_init_req_batch(struct req_batch *rb) @@ -2388,6 +2393,15 @@ static inline void io_init_req_batch(struct req_batch *rb) rb->task_refs = 0; rb->ctx_refs = 0; rb->task = NULL; + rb->rsrc_refs = NULL; + rb->rsrc_refs_nr = 0; +} + +static inline void __io_req_batch_flush_rsrc_refs(struct req_batch *rb) +{ + /* can get positive ->rsrc_refs_nr with NULL ->rsrc_refs */ + if (rb->rsrc_refs) + percpu_ref_put_many(rb->rsrc_refs, rb->rsrc_refs_nr); } static void io_req_free_batch_finish(struct io_ring_ctx *ctx, @@ -2397,6 +2411,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, io_put_task(rb->task, rb->task_refs); if (rb->ctx_refs) percpu_ref_put_many(&ctx->refs, rb->ctx_refs); + + __io_req_batch_flush_rsrc_refs(rb); } static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, @@ -2413,6 +2429,14 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, rb->task_refs++; rb->ctx_refs++; + if (req->fixed_rsrc_refs != rb->rsrc_refs) { + __io_req_batch_flush_rsrc_refs(rb); + rb->rsrc_refs = req->fixed_rsrc_refs; + rb->rsrc_refs_nr = 0; + } + /* it's ok to increment for NULL rsrc_refs, we'll handle it */ + rb->rsrc_refs_nr++; + io_dismantle_req(req); if (state->free_reqs != ARRAY_SIZE(state->reqs)) state->reqs[state->free_reqs++] = req; -- 2.24.0