Re: [RFC PATCH v3 15/20] io_uring: add io_recvzc request

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/20/23 16:27, Jens Axboe wrote:
On 12/19/23 2:03 PM, David Wei wrote:
diff --git a/io_uring/net.c b/io_uring/net.c
index 454ba301ae6b..7a2aadf6962c 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -637,7 +647,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
  	unsigned int cflags;
cflags = io_put_kbuf(req, issue_flags);
-	if (msg->msg_inq && msg->msg_inq != -1)
+	if (msg && msg->msg_inq && msg->msg_inq != -1)
  		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
@@ -652,7 +662,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
  			io_recv_prep_retry(req);
  			/* Known not-empty or unknown state, retry */
  			if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
-			    msg->msg_inq == -1)
+			    (msg && msg->msg_inq == -1))
  				return false;
  			if (issue_flags & IO_URING_F_MULTISHOT)
  				*ret = IOU_ISSUE_SKIP_COMPLETE;

These are a bit ugly, just pass in a dummy msg for this?

+int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
+	struct socket *sock;
+	unsigned flags;
+	int ret, min_ret = 0;
+	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct io_zc_rx_ifq *ifq;

Eg
	struct msghdr dummy_msg;

	dummy_msg.msg_inq = -1;

which will eat some stack, but probably not really an issue.


+	if (issue_flags & IO_URING_F_UNLOCKED)
+		return -EAGAIN;

This seems odd, why? If we're called with IO_URING_F_UNLOCKED set, then

It's my addition, let me explain.

io_recvzc() -> io_zc_rx_recv() -> ... -> zc_rx_recv_frag()

This chain posts completions to a buffer completion queue, and
we don't want extra locking to share it with io-wq threads. In
some sense it's quite similar to the CQ locking, considering
we restrict zc to DEFER_TASKRUN. And doesn't change anything
anyway because multishot cannot post completions from io-wq
and are executed from the poll callback in task work.

it's from io-wq. And returning -EAGAIN there will not do anything to

It will. It's supposed to just requeue for polling (it's not
IOPOLL to keep retrying -EAGAIN), just like multishots do.

Double checking the code, it can actually terminate the request,
which doesn't make much difference for us because multishots
should normally never end up in io-wq anyway, but I guess we
can improve it a liitle bit.

And it should also use IO_URING_F_IOWQ, forgot I split out
it from *F_UNLOCK.

change that. Usually this check is done to lock if we don't have it
already, eg with io_ring_submit_unlock(). Maybe I'm missing something
here!

@@ -590,5 +603,230 @@ const struct pp_memory_provider_ops io_uring_pp_zc_ops = {
  };
  EXPORT_SYMBOL(io_uring_pp_zc_ops);
+static inline struct io_uring_rbuf_cqe *io_zc_get_rbuf_cqe(struct io_zc_rx_ifq *ifq)
+{
+	struct io_uring_rbuf_cqe *cqe;
+	unsigned int cq_idx, queued, free, entries;
+	unsigned int mask = ifq->cq_entries - 1;
+
+	cq_idx = ifq->cached_cq_tail & mask;
+	smp_rmb();
+	queued = min(io_zc_rx_cqring_entries(ifq), ifq->cq_entries);
+	free = ifq->cq_entries - queued;
+	entries = min(free, ifq->cq_entries - cq_idx);
+	if (!entries)
+		return NULL;
+
+	cqe = &ifq->cqes[cq_idx];
+	ifq->cached_cq_tail++;
+	return cqe;
+}

smp_rmb() here needs a good comment on what the matching smp_wmb() is,
and why it's needed. Or maybe it should be an smp_load_acquire()?

+
+static int zc_rx_recv_frag(struct io_zc_rx_ifq *ifq, const skb_frag_t *frag,
+			   int off, int len, unsigned sock_idx)
+{
+	off += skb_frag_off(frag);
+
+	if (likely(page_is_page_pool_iov(frag->bv_page))) {
+		struct io_uring_rbuf_cqe *cqe;
+		struct io_zc_rx_buf *buf;
+		struct page_pool_iov *ppiov;
+
+		ppiov = page_to_page_pool_iov(frag->bv_page);
+		if (ppiov->pp->p.memory_provider != PP_MP_IOU_ZCRX ||
+		    ppiov->pp->mp_priv != ifq)
+			return -EFAULT;
+
+		cqe = io_zc_get_rbuf_cqe(ifq);
+		if (!cqe)
+			return -ENOBUFS;
+
+		buf = io_iov_to_buf(ppiov);
+		io_zc_rx_get_buf_uref(buf);
+
+		cqe->region = 0;
+		cqe->off = io_buf_pgid(ifq->pool, buf) * PAGE_SIZE + off;
+		cqe->len = len;
+		cqe->sock = sock_idx;
+		cqe->flags = 0;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return len;
+}

I think this would read a lot better as:

	if (unlikely(!page_is_page_pool_iov(frag->bv_page)))
		return -EOPNOTSUPP;

That's a bit of oracle coding, this branch is implemented in
a later patch.


	...
	return len;


--
Pavel Begunkov




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux