On 12/20/23 16:27, Jens Axboe wrote:
On 12/19/23 2:03 PM, David Wei wrote:
diff --git a/io_uring/net.c b/io_uring/net.c
index 454ba301ae6b..7a2aadf6962c 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -637,7 +647,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
unsigned int cflags;
cflags = io_put_kbuf(req, issue_flags);
- if (msg->msg_inq && msg->msg_inq != -1)
+ if (msg && msg->msg_inq && msg->msg_inq != -1)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
@@ -652,7 +662,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
- msg->msg_inq == -1)
+ (msg && msg->msg_inq == -1))
return false;
if (issue_flags & IO_URING_F_MULTISHOT)
*ret = IOU_ISSUE_SKIP_COMPLETE;
These are a bit ugly, just pass in a dummy msg for this?
+int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_zc_rx_ifq *ifq;
Eg
struct msghdr dummy_msg;
dummy_msg.msg_inq = -1;
which will eat some stack, but probably not really an issue.
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ return -EAGAIN;
This seems odd, why? If we're called with IO_URING_F_UNLOCKED set, then
It's my addition, let me explain.
io_recvzc() -> io_zc_rx_recv() -> ... -> zc_rx_recv_frag()
This chain posts completions to a buffer completion queue, and
we don't want extra locking to share it with io-wq threads. In
some sense it's quite similar to the CQ locking, considering
we restrict zc to DEFER_TASKRUN. And doesn't change anything
anyway because multishot cannot post completions from io-wq
and are executed from the poll callback in task work.
it's from io-wq. And returning -EAGAIN there will not do anything to
It will. It's supposed to just requeue for polling (it's not
IOPOLL to keep retrying -EAGAIN), just like multishots do.
Double checking the code, it can actually terminate the request,
which doesn't make much difference for us because multishots
should normally never end up in io-wq anyway, but I guess we
can improve it a liitle bit.
And it should also use IO_URING_F_IOWQ, forgot I split out
it from *F_UNLOCK.
change that. Usually this check is done to lock if we don't have it
already, eg with io_ring_submit_unlock(). Maybe I'm missing something
here!
@@ -590,5 +603,230 @@ const struct pp_memory_provider_ops io_uring_pp_zc_ops = {
};
EXPORT_SYMBOL(io_uring_pp_zc_ops);
+static inline struct io_uring_rbuf_cqe *io_zc_get_rbuf_cqe(struct io_zc_rx_ifq *ifq)
+{
+ struct io_uring_rbuf_cqe *cqe;
+ unsigned int cq_idx, queued, free, entries;
+ unsigned int mask = ifq->cq_entries - 1;
+
+ cq_idx = ifq->cached_cq_tail & mask;
+ smp_rmb();
+ queued = min(io_zc_rx_cqring_entries(ifq), ifq->cq_entries);
+ free = ifq->cq_entries - queued;
+ entries = min(free, ifq->cq_entries - cq_idx);
+ if (!entries)
+ return NULL;
+
+ cqe = &ifq->cqes[cq_idx];
+ ifq->cached_cq_tail++;
+ return cqe;
+}
smp_rmb() here needs a good comment on what the matching smp_wmb() is,
and why it's needed. Or maybe it should be an smp_load_acquire()?
+
+static int zc_rx_recv_frag(struct io_zc_rx_ifq *ifq, const skb_frag_t *frag,
+ int off, int len, unsigned sock_idx)
+{
+ off += skb_frag_off(frag);
+
+ if (likely(page_is_page_pool_iov(frag->bv_page))) {
+ struct io_uring_rbuf_cqe *cqe;
+ struct io_zc_rx_buf *buf;
+ struct page_pool_iov *ppiov;
+
+ ppiov = page_to_page_pool_iov(frag->bv_page);
+ if (ppiov->pp->p.memory_provider != PP_MP_IOU_ZCRX ||
+ ppiov->pp->mp_priv != ifq)
+ return -EFAULT;
+
+ cqe = io_zc_get_rbuf_cqe(ifq);
+ if (!cqe)
+ return -ENOBUFS;
+
+ buf = io_iov_to_buf(ppiov);
+ io_zc_rx_get_buf_uref(buf);
+
+ cqe->region = 0;
+ cqe->off = io_buf_pgid(ifq->pool, buf) * PAGE_SIZE + off;
+ cqe->len = len;
+ cqe->sock = sock_idx;
+ cqe->flags = 0;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return len;
+}
I think this would read a lot better as:
if (unlikely(!page_is_page_pool_iov(frag->bv_page)))
return -EOPNOTSUPP;
That's a bit of oracle coding, this branch is implemented in
a later patch.
...
return len;
--
Pavel Begunkov