kmalloc'ing struct io_notif is too expensive when done frequently, cache them as many other resources in io_uring. Keep two list, the first one is from where we're getting notifiers, it's protected by ->uring_lock. The second is protected by ->completion_lock, to which we queue released notifiers. Then we splice one list into another when needed. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 68 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7d058deb5f73..422ff835bf36 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -381,6 +381,8 @@ struct io_notif { u64 tag; /* see struct io_notif_slot::seq */ u32 seq; + /* hook into ctx->notif_list and ctx->notif_list_locked */ + struct list_head cache_node; union { struct callback_head task_work; @@ -469,6 +471,8 @@ struct io_ring_ctx { struct xarray io_bl_xa; struct list_head io_buffers_cache; + /* struct io_notif cache protected by uring_lock */ + struct list_head notif_list; struct list_head timeout_list; struct list_head ltimeout_list; struct list_head cq_overflow_list; @@ -481,6 +485,9 @@ struct io_ring_ctx { /* IRQ completion list, under ->completion_lock */ struct io_wq_work_list locked_free_list; unsigned int locked_free_nr; + /* struct io_notif cache protected by completion_lock */ + struct list_head notif_list_locked; + unsigned int notif_locked_nr; const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ @@ -1932,6 +1939,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_WQ_LIST(&ctx->locked_free_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); + INIT_LIST_HEAD(&ctx->notif_list); + INIT_LIST_HEAD(&ctx->notif_list_locked); return ctx; err: kfree(ctx->dummy_ubuf); @@ -2795,12 +2804,15 @@ static void __io_notif_complete_tw(struct callback_head *cb) spin_lock(&ctx->completion_lock); io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq); + + list_add(¬if->cache_node, &ctx->notif_list_locked); + ctx->notif_locked_nr++; + io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); io_cqring_ev_posted(ctx); percpu_ref_put(&ctx->refs); - kfree(notif); } static inline void io_notif_complete(struct io_notif *notif) @@ -2827,21 +2839,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, queue_work(system_unbound_wq, ¬if->commit_work); } +static void io_notif_splice_cached(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + spin_lock(&ctx->completion_lock); + list_splice_init(&ctx->notif_list_locked, &ctx->notif_list); + ctx->notif_locked_nr = 0; + spin_unlock(&ctx->completion_lock); +} + +static void io_notif_cache_purge(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + io_notif_splice_cached(ctx); + + while (!list_empty(&ctx->notif_list)) { + struct io_notif *notif = list_first_entry(&ctx->notif_list, + struct io_notif, cache_node); + + list_del(¬if->cache_node); + kfree(notif); + } +} + +static inline bool io_notif_has_cached(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + if (likely(!list_empty(&ctx->notif_list))) + return true; + if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_COMPL_BATCH)) + return false; + io_notif_splice_cached(ctx); + return !list_empty(&ctx->notif_list); +} + static struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif_slot *slot) __must_hold(&ctx->uring_lock) { struct io_notif *notif; - notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT); - if (!notif) - return NULL; + if (likely(io_notif_has_cached(ctx))) { + notif = list_first_entry(&ctx->notif_list, + struct io_notif, cache_node); + list_del(¬if->cache_node); + } else { + notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT); + if (!notif) + return NULL; + /* pre-initialise some fields */ + notif->ctx = ctx; + notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; + notif->uarg.callback = io_uring_tx_zerocopy_callback; + } notif->seq = slot->seq++; notif->tag = slot->tag; - notif->ctx = ctx; - notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; - notif->uarg.callback = io_uring_tx_zerocopy_callback; /* master ref owned by io_notif_slot, will be dropped on flush */ refcount_set(¬if->uarg.refcnt, 1); percpu_ref_get(&ctx->refs); @@ -11330,6 +11383,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); + io_notif_cache_purge(ctx); io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); -- 2.36.1