There are two problems: 1) we always allocate refnodes in advance and free them if those haven't been used. It's expensive, takes two allocations, where one of them is percpu. And it may be pretty common not actually using them. 2) Current API with allocating a refnode and setting some of the fields is error prone, we don't ever want to have a file node runninng fixed buffer callback... Solve both with pre-init/get API. Pre-init just leaves the node for later if not used, and for get (i.e. io_rsrc_refnode_get()), you need to explicitly pass all arguments setting callbacks/etc., so it's more resilient. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 58 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e4c92498a0af..6655246287f3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -443,6 +443,7 @@ struct io_ring_ctx { struct llist_head rsrc_put_llist; struct list_head rsrc_ref_list; spinlock_t rsrc_ref_lock; + struct fixed_rsrc_ref_node *rsrc_backup_node; struct io_restriction restrictions; @@ -7021,12 +7022,36 @@ static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_dat percpu_ref_kill(&ref_node->refs); } +static int io_rsrc_refnode_prealloc(struct io_ring_ctx *ctx) +{ + if (ctx->rsrc_backup_node) + return 0; + ctx->rsrc_backup_node = alloc_fixed_rsrc_ref_node(ctx); + return ctx->rsrc_backup_node ? 0 : -ENOMEM; +} + +static struct fixed_rsrc_ref_node * +io_rsrc_refnode_get(struct io_ring_ctx *ctx, + struct fixed_rsrc_data *rsrc_data, + void (*rsrc_put)(struct io_ring_ctx *ctx, + struct io_rsrc_put *prsrc)) +{ + struct fixed_rsrc_ref_node *node = ctx->rsrc_backup_node; + + WARN_ON_ONCE(!node); + + ctx->rsrc_backup_node = NULL; + node->rsrc_data = rsrc_data; + node->rsrc_put = rsrc_put; + return node; +} + static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data, struct io_ring_ctx *ctx, void (*rsrc_put)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)) { - struct fixed_rsrc_ref_node *backup_node; + struct fixed_rsrc_ref_node *node; int ret; if (data->quiesce) @@ -7034,13 +7059,9 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data, data->quiesce = true; do { - ret = -ENOMEM; - backup_node = alloc_fixed_rsrc_ref_node(ctx); - if (!backup_node) + ret = io_rsrc_refnode_prealloc(ctx); + if (ret) break; - backup_node->rsrc_data = data; - backup_node->rsrc_put = rsrc_put; - io_sqe_rsrc_kill_node(ctx, data); percpu_ref_kill(&data->refs); flush_delayed_work(&ctx->rsrc_put_work); @@ -7050,17 +7071,16 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data, break; percpu_ref_resurrect(&data->refs); - io_sqe_rsrc_set_node(ctx, data, backup_node); - backup_node = NULL; + node = io_rsrc_refnode_get(ctx, data, rsrc_put); + io_sqe_rsrc_set_node(ctx, data, node); reinit_completion(&data->done); + mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(); mutex_lock(&ctx->uring_lock); } while (ret >= 0); data->quiesce = false; - if (backup_node) - destroy_fixed_rsrc_ref_node(backup_node); return ret; } @@ -7711,11 +7731,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return -EOVERFLOW; if (done > ctx->nr_user_files) return -EINVAL; - - ref_node = alloc_fixed_rsrc_ref_node(ctx); - if (!ref_node) - return -ENOMEM; - init_fixed_file_ref_node(ctx, ref_node); + err = io_rsrc_refnode_prealloc(ctx); + if (err) + return err; fds = u64_to_user_ptr(up->data); for (done = 0; done < nr_args; done++) { @@ -7768,10 +7786,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); + ref_node = io_rsrc_refnode_get(ctx, data, io_ring_file_put); io_sqe_rsrc_set_node(ctx, data, ref_node); - } else - destroy_fixed_rsrc_ref_node(ref_node); - + } return done ? done : err; } @@ -8447,6 +8464,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); io_destroy_buffers(ctx); + if (ctx->rsrc_backup_node) + destroy_fixed_rsrc_ref_node(ctx->rsrc_backup_node); + #if defined(CONFIG_UNIX) if (ctx->ring_sock) { ctx->ring_sock->file = NULL; /* so that iput() is called */ -- 2.24.0