On 2/3/25 15:45, Keith Busch wrote:
From: Keith Busch <kbusch@xxxxxxxxxx>
Frequent alloc/free cycles on these is pretty costly. Use an io cache to
more efficiently reuse these buffers.
Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
include/linux/io_uring_types.h | 16 ++---
io_uring/filetable.c | 2 +-
io_uring/rsrc.c | 108 ++++++++++++++++++++++++---------
io_uring/rsrc.h | 2 +-
4 files changed, 92 insertions(+), 36 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index aa661ebfd6568..c0e0c1f92e5b1 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -67,8 +67,17 @@ struct io_file_table {
unsigned int alloc_hint;
};
+struct io_alloc_cache {
+ void **entries;
+ unsigned int nr_cached;
+ unsigned int max_cached;
+ size_t elem_size;
+};
+
struct io_buf_table {
struct io_rsrc_data data;
+ struct io_alloc_cache node_cache;
+ struct io_alloc_cache imu_cache;
We can avoid all churn if you kill patch 5/6 and place put the
caches directly into struct io_ring_ctx. It's a bit better for
future cache improvements and we can even reuse the node cache
for files.
...
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 864c2eabf8efd..5434b0d992d62 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -117,23 +117,39 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
unpin_user_page(imu->bvec[i].bv_page);
if (imu->acct_pages)
io_unaccount_mem(ctx, imu->acct_pages);
- kvfree(imu);
+ if (struct_size(imu, bvec, imu->nr_bvecs) >
+ ctx->buf_table.imu_cache.elem_size ||
It could be quite a large allocation, let's not cache it if
it hasn't came from the cache for now. We can always improve
on top.
And can we invert how it's calculated? See below. You'll have
fewer calculations in the fast path, and I don't really like
users looking at ->elem_size when it's not necessary.
#define IO_CACHED_BVEC_SEGS N
io_alloc_cache_init(&table->imu_cache, ...,
/* elem_size */ struct_size(imu, ..., IO_CACHED_BVEC_SEGS));
alloc(bvec_segs) {
if (bvec_segs <= IO_CACHED_BVEC_SEGS)
/* use the cache */
...
}
free() {
if (imu->nr_segs == IO_CACHED_BVEC_SEGS)
/* return to cache */
else {
WARN_ON_ONCE(imu->nr_segs < IO_CACHED_BVEC_SEGS);
...
}
}
+ !io_alloc_cache_put(&ctx->buf_table.imu_cache, imu))
+ kvfree(imu);
}
}
--
Pavel Begunkov