On Thu, 2022-07-07 at 17:23 -0600, Jens Axboe wrote: > For recvmsg/sendmsg, if they don't complete inline, we currently need > to allocate a struct io_async_msghdr for each request. This is a > somewhat large struct. > > Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the > alloc + free overhead considerably, yielding 4-5% of extra > performance > running netbench. > > Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> > --- > include/linux/io_uring_types.h | 6 ++- > io_uring/io_uring.c | 3 ++ > io_uring/net.c | 73 +++++++++++++++++++++++++++++--- > -- > io_uring/net.h | 11 ++++- > 4 files changed, 81 insertions(+), 12 deletions(-) > > diff --git a/include/linux/io_uring_types.h > b/include/linux/io_uring_types.h > index bf8f95332eda..d54b8b7e0746 100644 > --- a/include/linux/io_uring_types.h > +++ b/include/linux/io_uring_types.h > @@ -222,8 +222,7 @@ struct io_ring_ctx { > struct io_hash_table cancel_table_locked; > struct list_head cq_overflow_list; > struct io_alloc_cache apoll_cache; > - struct xarray personalities; > - u32 pers_next; > + struct io_alloc_cache netmsg_cache; > } ____cacheline_aligned_in_smp; > > /* IRQ completion list, under ->completion_lock */ > @@ -241,6 +240,9 @@ struct io_ring_ctx { > unsigned int file_alloc_start; > unsigned int file_alloc_end; > > + struct xarray personalities; > + u32 pers_next; > + > struct { > /* > * We cache a range of free CQEs we can use, once > exhausted it > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index b5098773d924..32110c5b4059 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -89,6 +89,7 @@ > #include "kbuf.h" > #include "rsrc.h" > #include "cancel.h" > +#include "net.h" > > #include "timeout.h" > #include "poll.h" > @@ -297,6 +298,7 @@ static __cold struct io_ring_ctx > *io_ring_ctx_alloc(struct io_uring_params *p) > INIT_LIST_HEAD(&ctx->cq_overflow_list); > INIT_LIST_HEAD(&ctx->io_buffers_cache); > io_alloc_cache_init(&ctx->apoll_cache); > + io_alloc_cache_init(&ctx->netmsg_cache); > init_completion(&ctx->ref_comp); > xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); > mutex_init(&ctx->uring_lock); > @@ -2473,6 +2475,7 @@ static __cold void io_ring_ctx_free(struct > io_ring_ctx *ctx) > __io_cqring_overflow_flush(ctx, true); > io_eventfd_unregister(ctx); > io_flush_apoll_cache(ctx); > + io_flush_netmsg_cache(ctx); > mutex_unlock(&ctx->uring_lock); > io_destroy_buffers(ctx); > if (ctx->sq_creds) > diff --git a/io_uring/net.c b/io_uring/net.c > index 6679069eeef1..ba7e94ff287c 100644 > --- a/io_uring/net.c > +++ b/io_uring/net.c > @@ -12,6 +12,7 @@ > > #include "io_uring.h" > #include "kbuf.h" > +#include "alloc_cache.h" > #include "net.h" > > #if defined(CONFIG_NET) > @@ -97,18 +98,57 @@ static bool io_net_retry(struct socket *sock, int > flags) > return sock->type == SOCK_STREAM || sock->type == > SOCK_SEQPACKET; > } > > +static void io_netmsg_recycle(struct io_kiocb *req, unsigned int > issue_flags) > +{ > + struct io_async_msghdr *hdr = req->async_data; > + > + if (!hdr || issue_flags & IO_URING_F_UNLOCKED) > + return; > + > + if (io_alloc_cache_store(&req->ctx->netmsg_cache)) { > + hlist_add_head(&hdr->cache_list, &req->ctx- > >netmsg_cache.list); can io_alloc_cache_store just do the store? would be nicer to have cache::list be generally unused outside of the cache code. > + req->async_data = NULL; > + req->flags &= ~REQ_F_ASYNC_DATA; > + } > +} > + > +static struct io_async_msghdr *io_recvmsg_alloc_async(struct > io_kiocb *req, > + unsigned int > issue_flags) > +{ > + struct io_ring_ctx *ctx = req->ctx; > + > + if (!(issue_flags & IO_URING_F_UNLOCKED) && > + !hlist_empty(&ctx->netmsg_cache.list)) { > + struct io_async_msghdr *hdr; > + > + hdr = hlist_entry(ctx->netmsg_cache.list.first, > + struct io_async_msghdr, > cache_list); > + ctx->netmsg_cache.nr_cached--; > + hlist_del(&hdr->cache_list); ditto here. I think all the hlist stuff and the nr_cached manipulation can be wrapped up > + req->flags |= REQ_F_ASYNC_DATA; > + req->async_data = hdr; > + return hdr; > + } > + > + if (!io_alloc_async_data(req)) > + return req->async_data; > + > + return NULL; > +} > + > static int io_setup_async_msg(struct io_kiocb *req, > - struct io_async_msghdr *kmsg) > + struct io_async_msghdr *kmsg, > + unsigned int issue_flags) > { > struct io_async_msghdr *async_msg = req->async_data; > > if (async_msg) > return -EAGAIN; > - if (io_alloc_async_data(req)) { > + async_msg = io_recvmsg_alloc_async(req, issue_flags); > + if (!async_msg) { > kfree(kmsg->free_iov); > return -ENOMEM; > } > - async_msg = req->async_data; > req->flags |= REQ_F_NEED_CLEANUP; > memcpy(async_msg, kmsg, sizeof(*kmsg)); > async_msg->msg.msg_name = &async_msg->addr; > @@ -195,7 +235,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int > issue_flags) > > if (!(req->flags & REQ_F_POLLED) && > (sr->flags & IORING_RECVSEND_POLL_FIRST)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, issue_flags); > > flags = sr->msg_flags; > if (issue_flags & IO_URING_F_NONBLOCK) > @@ -207,13 +247,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned > int issue_flags) > > if (ret < min_ret) { > if (ret == -EAGAIN && (issue_flags & > IO_URING_F_NONBLOCK)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > if (ret == -ERESTARTSYS) > ret = -EINTR; > if (ret > 0 && io_net_retry(sock, flags)) { > sr->done_io += ret; > req->flags |= REQ_F_PARTIAL_IO; > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > } > req_set_fail(req); > } > @@ -221,6 +261,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int > issue_flags) > if (kmsg->free_iov) > kfree(kmsg->free_iov); > req->flags &= ~REQ_F_NEED_CLEANUP; > + io_netmsg_recycle(req, issue_flags); > if (ret >= 0) > ret += sr->done_io; > else if (sr->done_io) > @@ -495,7 +536,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int > issue_flags) > > if (!(req->flags & REQ_F_POLLED) && > (sr->flags & IORING_RECVSEND_POLL_FIRST)) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, issue_flags); > > if (io_do_buffer_select(req)) { > void __user *buf; > @@ -519,13 +560,13 @@ int io_recvmsg(struct io_kiocb *req, unsigned > int issue_flags) > ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg- > >uaddr, flags); > if (ret < min_ret) { > if (ret == -EAGAIN && force_nonblock) > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > if (ret == -ERESTARTSYS) > ret = -EINTR; > if (ret > 0 && io_net_retry(sock, flags)) { > sr->done_io += ret; > req->flags |= REQ_F_PARTIAL_IO; > - return io_setup_async_msg(req, kmsg); > + return io_setup_async_msg(req, kmsg, > issue_flags); > } > req_set_fail(req); > } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & > (MSG_TRUNC | MSG_CTRUNC))) { > @@ -535,6 +576,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int > issue_flags) > /* fast path, check for non-NULL to avoid function call */ > if (kmsg->free_iov) > kfree(kmsg->free_iov); > + io_netmsg_recycle(req, issue_flags); > req->flags &= ~REQ_F_NEED_CLEANUP; > if (ret > 0) > ret += sr->done_io; > @@ -848,4 +890,17 @@ int io_connect(struct io_kiocb *req, unsigned > int issue_flags) > io_req_set_res(req, ret, 0); > return IOU_OK; > } > + > +void io_flush_netmsg_cache(struct io_ring_ctx *ctx) > +{ > + while (!hlist_empty(&ctx->netmsg_cache.list)) { > + struct io_async_msghdr *hdr; > + > + hdr = hlist_entry(ctx->netmsg_cache.list.first, > + struct io_async_msghdr, > cache_list); > + hlist_del(&hdr->cache_list); > + kfree(hdr); > + } > + ctx->netmsg_cache.nr_cached = 0; > +} again - could be put somewhere common. I assume there will not be much more cleanup than simple kfree > #endif > diff --git a/io_uring/net.h b/io_uring/net.h > index 81d71d164770..576efb602c7f 100644 > --- a/io_uring/net.h > +++ b/io_uring/net.h > @@ -5,7 +5,10 @@ > > #if defined(CONFIG_NET) > struct io_async_msghdr { > - struct iovec fast_iov[UIO_FASTIOV]; > + union { > + struct iovec fast_iov[UIO_FASTIOV]; > + struct hlist_node cache_list; > + }; > /* points to an allocated iov, if NULL we use fast_iov > instead */ > struct iovec *free_iov; > struct sockaddr __user *uaddr; > @@ -40,4 +43,10 @@ int io_socket(struct io_kiocb *req, unsigned int > issue_flags); > int io_connect_prep_async(struct io_kiocb *req); > int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe > *sqe); > int io_connect(struct io_kiocb *req, unsigned int issue_flags); > + > +void io_flush_netmsg_cache(struct io_ring_ctx *ctx); > +#else > +static inline void io_flush_netmsg_cache(struct io_ring_ctx *ctx) > +{ > +} > #endif