Currently every instance of struct io_tx_notifier holds a ctx reference, including ones sitting in caches. So, when we try to quiesce the ring (e.g. for register) we'd be waiting for refs that nobody can release. That's worked around in for cancellation. Don't do ctx references but wait for all notifiers to return into caches when needed. Even better solution would be to wait for all rsrc refs. It's also nice to remove an extra pair of percpu_ref_get/put(). Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f79178a3f38..8cfa8ea161e4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -453,6 +453,7 @@ struct io_ring_ctx { struct io_mapped_ubuf *dummy_ubuf; struct io_rsrc_data *file_data; struct io_rsrc_data *buf_data; + int nr_tx_ctx; struct delayed_work rsrc_put_work; struct llist_head rsrc_put_llist; @@ -1982,7 +1983,6 @@ static void io_zc_tx_work_callback(struct work_struct *work) io_cqring_ev_posted(ctx); percpu_ref_put(rsrc_refs); - percpu_ref_put(&ctx->refs); } static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, @@ -2028,6 +2028,7 @@ static void io_notifier_free_cached(struct io_ring_ctx *ctx) struct io_tx_notifier, cache_node); list_del(¬ifier->cache_node); kfree(notifier); + ctx->nr_tx_ctx--; } } @@ -2060,6 +2061,7 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx, notifier = kmalloc(sizeof(*notifier), gfp_flags); if (!notifier) return NULL; + ctx->nr_tx_ctx++; uarg = ¬ifier->uarg; uarg->ctx = ctx; uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; @@ -2072,7 +2074,6 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx, io_set_rsrc_node(¬ifier->fixed_rsrc_refs, ctx); refcount_set(¬ifier->uarg.refcnt, 1); - percpu_ref_get(&ctx->refs); return notifier; } @@ -9785,7 +9786,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) #endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); - io_notifier_free_cached(ctx); io_sqe_tx_ctx_unregister(ctx); io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); @@ -9946,6 +9946,19 @@ static __cold void io_ring_exit_work(struct work_struct *work) spin_lock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock); + while (1) { + int nr; + + mutex_lock(&ctx->uring_lock); + io_notifier_free_cached(ctx); + nr = ctx->nr_tx_ctx; + mutex_unlock(&ctx->uring_lock); + + if (!nr) + break; + schedule_timeout(interval); + } + io_ring_ctx_free(ctx); } -- 2.34.1