From: Keith Busch <kbusch@xxxxxxxxxx> Frequent alloc/free cycles on these is pretty costly. Use an io cache to more efficiently reuse these buffers. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- include/linux/io_uring_types.h | 2 + io_uring/filetable.c | 2 +- io_uring/io_uring.c | 2 + io_uring/rsrc.c | 70 +++++++++++++++++++++++++++------- io_uring/rsrc.h | 4 +- 5 files changed, 64 insertions(+), 16 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index c0fe8a00fe53a..3ce87dcd99eec 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -294,6 +294,8 @@ struct io_ring_ctx { struct io_file_table file_table; struct io_rsrc_data buf_table; + struct io_alloc_cache node_cache; + struct io_alloc_cache imu_cache; struct io_submit_state submit_state; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index dd8eeec97acf6..a21660e3145ab 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (slot_index >= ctx->file_table.data.nr) return -EINVAL; - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) return -ENOMEM; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2f5dd47e7dbf5..8f542a5f20a60 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -292,6 +292,7 @@ static void io_free_alloc_caches(struct io_ring_ctx *ctx) io_alloc_cache_free(&ctx->uring_cache, kfree); io_alloc_cache_free(&ctx->msg_cache, kfree); io_futex_cache_free(ctx); + io_rsrc_cache_free(ctx); } static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) @@ -339,6 +340,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_kiocb), 0); ret |= io_futex_cache_init(ctx); + ret |= io_rsrc_cache_init(ctx); if (ret) goto free_ref; init_completion(&ctx->ref_comp); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 0eceaf2e03777..450b4c039334d 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -33,6 +33,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, #define IORING_MAX_FIXED_FILES (1U << 20) #define IORING_MAX_REG_BUFFERS (1U << 14) +#define IO_CACHED_BVECS_SEGS 32 + int __io_account_mem(struct user_struct *user, unsigned long nr_pages) { unsigned long page_limit, cur_pages, new_pages; @@ -111,6 +113,22 @@ static void io_release_ubuf(void *priv) unpin_user_page(imu->bvec[i].bv_page); } +static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, + int nr_bvecs) +{ + if (nr_bvecs <= IO_CACHED_BVECS_SEGS) + return io_cache_alloc(&ctx->imu_cache, GFP_KERNEL); + return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs), + GFP_KERNEL); +} + +static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) +{ + if (imu->nr_bvecs > IO_CACHED_BVECS_SEGS || + !io_alloc_cache_put(&ctx->imu_cache, imu)) + kvfree(imu); +} + static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) { if (!refcount_dec_and_test(&imu->refs)) @@ -119,22 +137,44 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); imu->release(imu->priv); - kvfree(imu); } -struct io_rsrc_node *io_rsrc_node_alloc(int type) +struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) { struct io_rsrc_node *node; - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = io_cache_alloc(&ctx->node_cache, GFP_KERNEL); if (node) { node->type = type; node->refs = 1; + node->tag = 0; + node->file_ptr = 0; } return node; } -__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) +bool io_rsrc_cache_init(struct io_ring_ctx *ctx) +{ + const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec, + IO_CACHED_BVECS_SEGS); + const int node_size = sizeof(struct io_rsrc_node); + bool ret; + + ret = io_alloc_cache_init(&ctx->node_cache, IO_ALLOC_CACHE_MAX, + node_size, 0); + ret |= io_alloc_cache_init(&ctx->imu_cache, IO_ALLOC_CACHE_MAX, + imu_cache_size, 0); + return ret; +} + +void io_rsrc_cache_free(struct io_ring_ctx *ctx) +{ + io_alloc_cache_free(&ctx->node_cache, kfree); + io_alloc_cache_free(&ctx->imu_cache, kfree); +} + +__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, + struct io_rsrc_data *data) { if (!data->nr) return; @@ -207,7 +247,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); @@ -465,7 +505,8 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) break; } - kfree(node); + if (!io_alloc_cache_put(&ctx->node_cache, node)) + kvfree(node); } int io_sqe_files_unregister(struct io_ring_ctx *ctx) @@ -527,7 +568,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; } ret = -ENOMEM; - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) { fput(file); goto fail; @@ -732,7 +773,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (!iov->iov_base) return NULL; - node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; @@ -752,10 +793,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, coalesced = io_coalesce_buffer(&pages, &nr_pages, &data); } - imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + imu = io_alloc_imu(ctx, nr_pages); if (!imu) goto done; + imu->nr_bvecs = nr_pages; ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); if (ret) { unpin_user_pages(pages, nr_pages); @@ -766,7 +808,6 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, /* store original address for later verification */ imu->ubuf = (unsigned long) iov->iov_base; imu->len = iov->iov_len; - imu->nr_bvecs = nr_pages; imu->folio_shift = PAGE_SHIFT; imu->release = io_release_ubuf; imu->priv = imu; @@ -789,7 +830,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, } done: if (ret) { - kvfree(imu); + if (imu) + io_free_imu(ctx, imu); if (node) io_put_rsrc_node(ctx, node); node = ERR_PTR(ret); @@ -893,14 +935,14 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, goto unlock; } - node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!node) { ret = -ENOMEM; goto unlock; } nr_bvecs = blk_rq_nr_phys_segments(rq); - imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL); + imu = io_alloc_imu(ctx, nr_bvecs); if (!imu) { kfree(node); ret = -ENOMEM; @@ -1137,7 +1179,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (!src_node) { dst_node = NULL; } else { - dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); + dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; goto out_free; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 7600e2736eeb3..27e545694d01e 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -48,7 +48,9 @@ struct io_imu_folio_data { unsigned int nr_folios; }; -struct io_rsrc_node *io_rsrc_node_alloc(int type); +bool io_rsrc_cache_init(struct io_ring_ctx *ctx); +void io_rsrc_cache_free(struct io_ring_ctx *ctx); +struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr); -- 2.43.5