For recvmsg/sendmsg, if they don't complete inline, we currently need to allocate a struct io_async_msghdr for each request. This is a somewhat large struct. Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the alloc + free overhead considerably, yielding 4-5% of extra performance running netbench. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- include/linux/io_uring_types.h | 6 ++- io_uring/io_uring.c | 3 ++ io_uring/net.c | 73 +++++++++++++++++++++++++++++----- io_uring/net.h | 11 ++++- 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index bf8f95332eda..d54b8b7e0746 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,8 +222,7 @@ struct io_ring_ctx { struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; - struct xarray personalities; - u32 pers_next; + struct io_alloc_cache netmsg_cache; } ____cacheline_aligned_in_smp; /* IRQ completion list, under ->completion_lock */ @@ -241,6 +240,9 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; + struct xarray personalities; + u32 pers_next; + struct { /* * We cache a range of free CQEs we can use, once exhausted it diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b5098773d924..32110c5b4059 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -89,6 +89,7 @@ #include "kbuf.h" #include "rsrc.h" #include "cancel.h" +#include "net.h" #include "timeout.h" #include "poll.h" @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); io_alloc_cache_init(&ctx->apoll_cache); + io_alloc_cache_init(&ctx->netmsg_cache); init_completion(&ctx->ref_comp); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) __io_cqring_overflow_flush(ctx, true); io_eventfd_unregister(ctx); io_flush_apoll_cache(ctx); + io_flush_netmsg_cache(ctx); mutex_unlock(&ctx->uring_lock); io_destroy_buffers(ctx); if (ctx->sq_creds) diff --git a/io_uring/net.c b/io_uring/net.c index 6679069eeef1..ba7e94ff287c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -12,6 +12,7 @@ #include "io_uring.h" #include "kbuf.h" +#include "alloc_cache.h" #include "net.h" #if defined(CONFIG_NET) @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int flags) return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; } +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_async_msghdr *hdr = req->async_data; + + if (!hdr || issue_flags & IO_URING_F_UNLOCKED) + return; + + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) { + hlist_add_head(&hdr->cache_list, &req->ctx->netmsg_cache.list); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; + } +} + +static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req, + unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!(issue_flags & IO_URING_F_UNLOCKED) && + !hlist_empty(&ctx->netmsg_cache.list)) { + struct io_async_msghdr *hdr; + + hdr = hlist_entry(ctx->netmsg_cache.list.first, + struct io_async_msghdr, cache_list); + ctx->netmsg_cache.nr_cached--; + hlist_del(&hdr->cache_list); + req->flags |= REQ_F_ASYNC_DATA; + req->async_data = hdr; + return hdr; + } + + if (!io_alloc_async_data(req)) + return req->async_data; + + return NULL; +} + static int io_setup_async_msg(struct io_kiocb *req, - struct io_async_msghdr *kmsg) + struct io_async_msghdr *kmsg, + unsigned int issue_flags) { struct io_async_msghdr *async_msg = req->async_data; if (async_msg) return -EAGAIN; - if (io_alloc_async_data(req)) { + async_msg = io_recvmsg_alloc_async(req, issue_flags); + if (!async_msg) { kfree(kmsg->free_iov); return -ENOMEM; } - async_msg = req->async_data; req->flags |= REQ_F_NEED_CLEANUP; memcpy(async_msg, kmsg, sizeof(*kmsg)); async_msg->msg.msg_name = &async_msg->addr; @@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) @@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (ret == -ERESTARTSYS) ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); } req_set_fail(req); } @@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; + io_netmsg_recycle(req, issue_flags); if (ret >= 0) ret += sr->done_io; else if (sr->done_io) @@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (io_do_buffer_select(req)) { void __user *buf; @@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); if (ret == -ERESTARTSYS) ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); + return io_setup_async_msg(req, kmsg, issue_flags); } req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { @@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) /* fast path, check for non-NULL to avoid function call */ if (kmsg->free_iov) kfree(kmsg->free_iov); + io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; if (ret > 0) ret += sr->done_io; @@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) io_req_set_res(req, ret, 0); return IOU_OK; } + +void io_flush_netmsg_cache(struct io_ring_ctx *ctx) +{ + while (!hlist_empty(&ctx->netmsg_cache.list)) { + struct io_async_msghdr *hdr; + + hdr = hlist_entry(ctx->netmsg_cache.list.first, + struct io_async_msghdr, cache_list); + hlist_del(&hdr->cache_list); + kfree(hdr); + } + ctx->netmsg_cache.nr_cached = 0; +} #endif diff --git a/io_uring/net.h b/io_uring/net.h index 81d71d164770..576efb602c7f 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,7 +5,10 @@ #if defined(CONFIG_NET) struct io_async_msghdr { - struct iovec fast_iov[UIO_FASTIOV]; + union { + struct iovec fast_iov[UIO_FASTIOV]; + struct hlist_node cache_list; + }; /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; struct sockaddr __user *uaddr; @@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags); int io_connect_prep_async(struct io_kiocb *req); int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); + +void io_flush_netmsg_cache(struct io_ring_ctx *ctx); +#else +static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx) +{ +} #endif -- 2.35.1