On 2/21/25 20:51, David Wei wrote:
Currently only multishot recvzc requests are supported, but sometimes
there is a need to do a single recv e.g. peeking at some data in the
socket. Add single shot recvzc requests where IORING_RECV_MULTISHOT is
_not_ set and the sqe->len field is set to the number of bytes to read
N.
There is no oneshot, we need to change the message.
There could be multiple completions containing data, like the multishot
case, since N bytes could be split across multiple frags. This is
followed by a final completion with res and cflags both set to 0 that
indicate the completion of the request, or a -res that indicate an
error.
Signed-off-by: David Wei <dw@xxxxxxxxxxx>
---
io_uring/net.c | 19 +++++++++++++++++--
io_uring/zcrx.c | 17 ++++++++++++-----
io_uring/zcrx.h | 2 +-
3 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 000dc70d08d0..cae34a24266c 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -94,6 +94,7 @@ struct io_recvzc {
struct file *file;
unsigned msg_flags;
u16 flags;
+ u32 len;
struct io_zcrx_ifq *ifq;
};
@@ -1241,7 +1242,7 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
unsigned ifq_idx;
if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr ||
- sqe->len || sqe->addr3))
+ sqe->addr3))
return -EINVAL;
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
@@ -1250,6 +1251,12 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
zc->ifq = req->ctx->ifq;
if (!zc->ifq)
return -EINVAL;
+ zc->len = READ_ONCE(sqe->len);
+ if (zc->len == UINT_MAX)
+ return -EINVAL;
The uapi gives u32, if we're using a special value it should
match the type. ~(u32)0
+ /* UINT_MAX means no limit on readlen */
+ if (!zc->len)
+ zc->len = UINT_MAX;
zc->flags = READ_ONCE(sqe->ioprio);
zc->msg_flags = READ_ONCE(sqe->msg_flags);
@@ -1269,6 +1276,7 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
+ bool limit = zc->len != UINT_MAX;
struct socket *sock;
int ret;
@@ -1281,7 +1289,7 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
return -ENOTSOCK;
ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
- issue_flags);
+ issue_flags, &zc->len);
if (unlikely(ret <= 0) && ret != -EAGAIN) {
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1296,6 +1304,13 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
+ if (zc->len == 0) {
If len hits zero we should always complete it, regardless
of errors the stack might have returned, so might be
cleaner if you do that check right after io_zcrx_recv().
+ io_req_set_res(req, 0, 0);
+
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_STOP_MULTISHOT;
+ return IOU_OK;
+ }
if (issue_flags & IO_URING_F_MULTISHOT)
return IOU_ISSUE_SKIP_COMPLETE;
return -EAGAIN;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index f2d326e18e67..74bca4e471bc 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -817,6 +817,7 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
int i, copy, end, off;
int ret = 0;
+ len = min_t(size_t, len, desc->count);
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
return -EAGAIN;
@@ -894,26 +895,32 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
out:
if (offset == start_off)
return ret;
+ if (desc->count != UINT_MAX)
+ desc->count -= (offset - start_off);
I'd say just set desc->count to it's max value (size_t), and
never care about checking for limits after.
return offset - start_off;
}
static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
struct sock *sk, int flags,
- unsigned issue_flags)
+ unsigned issue_flags, unsigned int *outlen)
{
+ unsigned int len = *outlen;
+ bool limit = len != UINT_MAX;
struct io_zcrx_args args = {
.req = req,
.ifq = ifq,
.sock = sk->sk_socket,
};
read_descriptor_t rd_desc = {
- .count = 1,
+ .count = len,
.arg.data = &args,
};
int ret;
lock_sock(sk);
ret = tcp_read_sock(sk, &rd_desc, io_zcrx_recv_skb);
+ if (limit && ret)
+ *outlen = len - ret;
if (ret <= 0) {
if (ret < 0 || sock_flag(sk, SOCK_DONE))
goto out;
@@ -930,7 +937,7 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
ret = IOU_REQUEUE;
} else if (sock_flag(sk, SOCK_DONE)) {
/* Make it to retry until it finally gets 0. */
- if (issue_flags & IO_URING_F_MULTISHOT)
+ if (!limit && (issue_flags & IO_URING_F_MULTISHOT))
ret = IOU_REQUEUE;
And with earlier len check in net.c you don't need this change,
which feels wrong, as it's only here to circumvent some handling
in net.c, I assume
--
Pavel Begunkov