For bundles, the initial recv operation is always just a single buffer, as we don't yet know how much data is available in the socket. However, this can lead to a somewhat imbalanced string of receives, where the first recv gets a single buffer and the second gets a bunch. Allow the initial peek operation to get up to 4 buffers, taking advantage of the fact that there may be more data available, rather than just doing a single buffer. This has been shown to work well across a variety of recv workloads, as it's still cheap enough to do, while ensuring that we do get to amortize the cost of traversing the network stack and socket operations. Link: https://github.com/axboe/liburing/issues/1197 Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv") Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index c95dc1736dd9..2c052996c9bf 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -209,6 +209,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, int nr_iovs = arg->nr_iovs; __u16 nr_avail, tail, head; struct io_uring_buf *buf; + int needed = 0; tail = smp_load_acquire(&br->tail); head = bl->head; @@ -218,19 +219,22 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { - int needed; - needed = (arg->max_len + buf->len - 1) / buf->len; needed = min(needed, PEEK_MAX_IMPORT); - if (nr_avail > needed) - nr_avail = needed; + } else if (arg->max_vecs) { + needed = arg->max_vecs; } + if (nr_avail > needed) + nr_avail = needed; + /* - * only alloc a bigger array if we know we have data to map, eg not - * a speculative peek operation. + * Alloc a bigger array if we know we have data to map, or if a + * a speculative peek operation tries to map more than what is + * available. */ - if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) { + if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && + (arg->max_len || arg->max_vecs)) { iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL); if (unlikely(!iov)) return -ENOMEM; @@ -238,7 +242,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, kfree(arg->iovs); arg->iovs = iov; nr_iovs = nr_avail; - } else if (nr_avail < nr_iovs) { + } else if (nr_iovs > nr_avail) { nr_iovs = nr_avail; } diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index b90aca3a57fa..8248ffda3a43 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -53,7 +53,8 @@ struct buf_sel_arg { size_t out_len; size_t max_len; int nr_iovs; - int mode; + unsigned short mode; + unsigned short max_vecs; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, diff --git a/io_uring/net.c b/io_uring/net.c index 594490a1389b..48667f3a2388 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1076,8 +1076,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg arg.mode |= KBUF_MODE_FREE; } + /* + * Use the passed back residual if we have it, if not allow + * peeking of up to 4 buffers. + */ if (kmsg->msg.msg_inq > 0) arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); + else + arg.max_vecs = 4; ret = io_buffers_peek(req, &arg); if (unlikely(ret < 0)) -- Jens Axboe