Let the io_async_rw hold on to the iovec and reuse it, rather than always allocate and free them. While doing so, shrink io_async_rw by getting rid of the bigger embedded fast iovec. Since iovecs are being recycled now, shrink it from 8 to 1. This reduces the io_async_rw size from 264 to 160 bytes, a 40% reduction. Includes KASAN tracking as well, if that is turned on. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/rw.c | 38 ++++++++++++++++++++++++++++++++------ io_uring/rw.h | 3 ++- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index f26e1dd5acaf..71ef417373c2 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -81,7 +81,9 @@ static int __io_import_iovec(int ddir, struct io_kiocb *req, { const struct io_issue_def *def = &io_issue_defs[req->opcode]; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + struct iovec *iov; void __user *buf; + int nr_segs, ret; size_t sqe_len; buf = u64_to_user_ptr(rw->addr); @@ -99,9 +101,23 @@ static int __io_import_iovec(int ddir, struct io_kiocb *req, return import_ubuf(ddir, buf, sqe_len, &io->iter); } - io->free_iovec = io->fast_iov; - return __import_iovec(ddir, buf, sqe_len, UIO_FASTIOV, &io->free_iovec, - &io->iter, req->ctx->compat); + if (io->free_iovec) { + nr_segs = io->free_iov_nr; + iov = io->free_iovec; + } else { + iov = &io->fast_iov; + nr_segs = 1; + } + ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter, + req->ctx->compat); + if (unlikely(ret < 0)) + return ret; + if (iov) { + io->free_iov_nr = io->iter.nr_segs; + kfree(io->free_iovec); + io->free_iovec = iov; + } + return 0; } static inline int io_import_iovec(int rw, struct io_kiocb *req, @@ -122,6 +138,7 @@ static void io_rw_iovec_free(struct io_async_rw *rw) { if (rw->free_iovec) { kfree(rw->free_iovec); + rw->free_iov_nr = 0; rw->free_iovec = NULL; } } @@ -134,6 +151,8 @@ static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) io_rw_iovec_free(rw); return; } + if (rw->free_iovec) + kasan_mempool_poison_object(rw->free_iovec); if (io_alloc_cache_put(&req->ctx->rw_cache, &rw->cache)) { req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; @@ -155,15 +174,19 @@ static int io_rw_alloc_async(struct io_kiocb *req) entry = io_alloc_cache_get(&ctx->rw_cache); if (entry) { rw = container_of(entry, struct io_async_rw, cache); - req->flags |= REQ_F_ASYNC_DATA; + if (rw->free_iovec) + kasan_mempool_unpoison_object(rw->free_iovec, + rw->free_iov_nr * sizeof(struct iovec)); + req->flags |= REQ_F_ASYNC_DATA | REQ_F_NEED_CLEANUP; req->async_data = rw; goto done; } if (!io_alloc_async_data(req)) { rw = req->async_data; -done: rw->free_iovec = NULL; + rw->free_iov_nr = 0; +done: rw->bytes_done = 0; return 0; } @@ -1130,6 +1153,9 @@ void io_rw_cache_free(struct io_cache_entry *entry) struct io_async_rw *rw; rw = container_of(entry, struct io_async_rw, cache); - kfree(rw->free_iovec); + if (rw->free_iovec) + kasan_mempool_unpoison_object(rw->free_iovec, + rw->free_iov_nr * sizeof(struct iovec)); + io_rw_iovec_free(rw); kfree(rw); } diff --git a/io_uring/rw.h b/io_uring/rw.h index 56fb1703dc5a..26dfa12e2306 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -12,8 +12,9 @@ struct io_async_rw { }; struct iov_iter iter; struct iov_iter_state iter_state; - struct iovec fast_iov[UIO_FASTIOV]; + struct iovec fast_iov; struct iovec *free_iovec; + int free_iov_nr; struct wait_page_queue wpq; }; -- 2.43.0