Currently, we can use registered buffers with send zerocopy but not sendmsg. However, users want to use it with zc sendmsg as well, and pass a scatter list into a registered buffer. Implement a vectored registered buffer support for sendmsg zerocopy. The ABI should be intuitive. The user should set sqe->buf_index to the desired registered buffer and also and pass IORING_RECVSEND_FIXED_BUF flag. msghdr should still point to an iovec with user addresses served to calculate offsets how it has always been with registered buffers. In other words, in most cases the user passes the same iovec it'd pass to the non-registered buffer version. It's the first step and requires some more work cleaning the infrastructure. It'll also need some imporvement on the bvec caching side. Note, we can easily enable it for non zc version, and even extend the feature to read/write requests. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- io_uring/net.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index bc062b5a7a55..6a19b6a7dc06 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -204,6 +204,18 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, return 0; } +static void io_net_bvec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, + struct bio_vec *bvec, int max_segs) +{ + if (bvec) { + req->flags |= REQ_F_NEED_CLEANUP; + if (kmsg->free_vec) + kfree(kmsg->free_vec); + kmsg->free_vec_bytes = max_segs * sizeof(*bvec); + kmsg->free_vec = bvec; + } +} + static inline void io_mshot_prep_retry(struct io_kiocb *req, struct io_async_msghdr *kmsg) { @@ -267,6 +279,31 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, } #endif +static int io_send_setup_sg_fixed(struct io_kiocb *req, struct iovec *iovec, + int nr_iovs, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *iomsg = req->async_data; + struct iov_iter *iter = &iomsg->msg.msg_iter; + struct io_ring_ctx *ctx = req->ctx; + struct io_mapped_ubuf *imu; + struct bio_vec *bvec; + int idx; + + if (unlikely(sr->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + idx = array_index_nospec(sr->buf_index, ctx->nr_user_bufs); + imu = READ_ONCE(ctx->user_bufs[idx]); + io_req_set_rsrc_node(sr->notif, ctx, 0); + + bvec = io_import_fixed_vec(ddir, iter, imu, iovec, nr_iovs); + if (unlikely(IS_ERR(bvec))) + return PTR_ERR(bvec); + + io_net_bvec_assign(req, iomsg, bvec, iter->nr_segs); + return 0; +} + static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, struct user_msghdr *msg, int ddir) { @@ -413,6 +450,14 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe ret = io_sendmsg_copy_hdr(req, kmsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; + + if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + struct iovec *iov; + + iov = kmsg->free_vec ? kmsg->free_vec : &kmsg->fast_iov; + return io_send_setup_sg_fixed(req, iov, + kmsg->msg.msg_iter.nr_segs, ITER_SOURCE); + } return ret; } @@ -1270,8 +1315,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->opcode != IORING_OP_SEND_ZC) { if (unlikely(sqe->addr2 || sqe->file_index)) return -EINVAL; - if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) - return -EINVAL; } zc->len = READ_ONCE(sqe->len); -- 2.46.0