Re: [RFC PATCH v3 3/3] io_uring: Add `sendto(2)` and `recvfrom(2)` support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 1/7/22 3:33 PM, Praveen Kumar wrote:
On 07-01-2022 02:08, Ammar Faizi wrote:
On Thu, 6 Jan 2022 at 23:01:59 +0530, Praveen Kumar <kpraveen.lkml@xxxxxxxxx> wrote:
On 30-12-2021 23:22, Ammar Faizi wrote:
This adds sendto(2) and recvfrom(2) support for io_uring.

New opcodes:
   IORING_OP_SENDTO
   IORING_OP_RECVFROM

Cc: Nugra <richiisei@xxxxxxxxx>
Tested-by: Nugra <richiisei@xxxxxxxxx>
Link: https://github.com/axboe/liburing/issues/397
Signed-off-by: Ammar Faizi <ammarfaizi2@xxxxxxxxx>
---

v3:
   - Fix build error when CONFIG_NET is undefined should be done in
     the first patch, not this patch.

   - Add Tested-by tag from Nugra.

v2:
   - In `io_recvfrom()`, mark the error check of `move_addr_to_user()`
     call as unlikely.

   - Fix build error when CONFIG_NET is undefined.

   - Added Nugra to CC list (tester).
---
  fs/io_uring.c                 | 80 +++++++++++++++++++++++++++++++++--
  include/uapi/linux/io_uring.h |  2 +
  2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7adcb591398f..3726958f8f58 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -575,7 +575,15 @@ struct io_sr_msg {
  	union {
  		struct compat_msghdr __user	*umsg_compat;
  		struct user_msghdr __user	*umsg;
-		void __user			*buf;
+
+		struct {
+			void __user		*buf;
+			struct sockaddr __user	*addr;
+			union {
+				int		sendto_addr_len;
+				int __user	*recvfrom_addr_len;
+			};
+		};
  	};
  	int				msg_flags;
  	int				bgid;
@@ -1133,6 +1141,19 @@ static const struct io_op_def io_op_defs[] = {
  		.needs_file = 1
  	},
  	[IORING_OP_GETXATTR] = {},
+	[IORING_OP_SENDTO] = {
+		.needs_file		= 1,
+		.unbound_nonreg_file	= 1,
+		.pollout		= 1,
+		.audit_skip		= 1,
+	},
+	[IORING_OP_RECVFROM] = {
+		.needs_file		= 1,
+		.unbound_nonreg_file	= 1,
+		.pollin			= 1,
+		.buffer_select		= 1,
+		.audit_skip		= 1,
+	},
  };
/* requests with any of those set should undergo io_disarm_next() */
@@ -5216,12 +5237,24 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
  		return -EINVAL;
+ /*
+	 * For IORING_OP_SEND{,TO}, the assignment to @sr->umsg
+	 * is equivalent to an assignment to @sr->buf.
+	 */
  	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
  	sr->len = READ_ONCE(sqe->len);
  	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
  	if (sr->msg_flags & MSG_DONTWAIT)
  		req->flags |= REQ_F_NOWAIT;
+ if (req->opcode == IORING_OP_SENDTO) {
+		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+		sr->sendto_addr_len = READ_ONCE(sqe->addr3);
+	} else {
+		sr->addr = (struct sockaddr __user *) NULL;

Let's have sendto_addr_len  = 0

Will do in the RFC v5.


+	}
+
  #ifdef CONFIG_COMPAT
  	if (req->ctx->compat)
  		sr->msg_flags |= MSG_CMSG_COMPAT;
@@ -5275,6 +5308,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
static int io_sendto(struct io_kiocb *req, unsigned int issue_flags)
  {
+	struct sockaddr_storage address;
  	struct io_sr_msg *sr = &req->sr_msg;
  	struct msghdr msg;
  	struct iovec iov;
@@ -5291,10 +5325,20 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags)
  	if (unlikely(ret))
  		return ret;
- msg.msg_name = NULL;
+
  	msg.msg_control = NULL;
  	msg.msg_controllen = 0;
-	msg.msg_namelen = 0;
+	if (sr->addr) {
+		ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len,
+					  &address);
+		if (unlikely(ret < 0))
+			goto fail;
+		msg.msg_name = (struct sockaddr *) &address;
+		msg.msg_namelen = sr->sendto_addr_len;
+	} else {
+		msg.msg_name = NULL;
+		msg.msg_namelen = 0;
+	}
flags = req->sr_msg.msg_flags;
  	if (issue_flags & IO_URING_F_NONBLOCK)
@@ -5309,6 +5353,7 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags)
  			return -EAGAIN;
  		if (ret == -ERESTARTSYS)
  			ret = -EINTR;
+	fail:
  		req_set_fail(req);

I think there is a problem with "fail" goto statement. Not getting
full clarity on this change. With latest kernel, I see
req_set_fail(req) inside if check, which I don't see here. Can you
please resend the patch on latest kernel version. Thanks.

I will send the v5 on top of "for-next" branch in Jens' tree soon.

That is already inside an "if check" anyway. We go to that label when
the move_addr_to_kernel() fails (most of the time it is -EFAULT or
-EINVAL).

That part looks like this (note the if check before the goto):
----------------------------------------------------------------------
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	if (sr->addr) {
		ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len,
					  &address);
		if (unlikely(ret < 0))
			goto fail;
		msg.msg_name = (struct sockaddr *) &address;
		msg.msg_namelen = sr->sendto_addr_len;
	} else {
		msg.msg_name = NULL;
		msg.msg_namelen = 0;
	}

	flags = req->sr_msg.msg_flags;
	if (issue_flags & IO_URING_F_NONBLOCK)
		flags |= MSG_DONTWAIT;
	if (flags & MSG_WAITALL)
		min_ret = iov_iter_count(&msg.msg_iter);

	msg.msg_flags = flags;
	ret = sock_sendmsg(sock, &msg);
	if (ret < min_ret) {
		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
			return -EAGAIN;
		if (ret == -ERESTARTSYS)
			ret = -EINTR;
	fail:

Thanks for sending this. IMO this goto label should be just before
the "if (ret < min_ret)" statement.

AFAICT, error returned by move_addr_to_kernel are only -EFAULT and -EINVAL,
so the check against -EAGAIN and -ERESTARTSYS is unnecessary for that
case. We can skip that.

RFC v4 here (rebased on top of "for-next" Jens' tree):
https://lore.kernel.org/io-uring/20220107000006.1194026-1-ammarfaizi2@xxxxxxxxxxx/T/


		req_set_fail(req);
	}
	__io_req_complete(req, issue_flags, ret, 0);
	return 0;
----------------------------------------------------------------------

  	}
  	__io_req_complete(req, issue_flags, ret, 0);
@@ -5427,13 +5472,25 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
  		return -EINVAL;
+ /*
+	 * For IORING_OP_RECV{,FROM}, the assignment to @sr->umsg
+	 * is equivalent to an assignment to @sr->buf.
+	 */
  	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
  	sr->len = READ_ONCE(sqe->len);
  	sr->bgid = READ_ONCE(sqe->buf_group);
  	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
  	if (sr->msg_flags & MSG_DONTWAIT)
  		req->flags |= REQ_F_NOWAIT;
+ if (req->opcode == IORING_OP_RECVFROM) {
+		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+		sr->recvfrom_addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+	} else {
+		sr->addr = (struct sockaddr __user *) NULL;

I think recvfrom_addr_len should also be pointed to NULL, instead of
garbage for this case.

Will do in the RFC v5.


+	}
+
  #ifdef CONFIG_COMPAT
  	if (req->ctx->compat)
  		sr->msg_flags |= MSG_CMSG_COMPAT;
@@ -5509,6 +5566,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags)
  	struct iovec iov;
  	unsigned flags;
  	int ret, min_ret = 0;
+	struct sockaddr_storage address;
  	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
sock = sock_from_file(req->file);
@@ -5526,7 +5584,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags)
  	if (unlikely(ret))
  		goto out_free;
- msg.msg_name = NULL;
+	msg.msg_name = sr->addr ? (struct sockaddr *) &address : NULL;
  	msg.msg_control = NULL;
  	msg.msg_controllen = 0;
  	msg.msg_namelen = 0;

I think namelen should also be updated ?

It doesn't have to be updated. From net/socket.c there is a comment
like this:

	/* We assume all kernel code knows the size of sockaddr_storage */
	msg.msg_namelen = 0;

Full __sys_recvfrom() source code, see here:
https://github.com/torvalds/linux/blob/v5.16-rc8/net/socket.c#L2085-L2088

I will add the same comment in next series to clarify this one.


@@ -5540,6 +5598,16 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags)
  		min_ret = iov_iter_count(&msg.msg_iter);
ret = sock_recvmsg(sock, &msg, flags);
+
+	if (ret >= 0 && sr->addr != NULL) {
+		int tmp;
+
+		tmp = move_addr_to_user(&address, msg.msg_namelen, sr->addr,
+					sr->recvfrom_addr_len);
+		if (unlikely(tmp < 0))
+			ret = tmp;
+	}
+
  out_free:
  	if (ret < min_ret) {
  		if (ret == -EAGAIN && force_nonblock)
@@ -6778,9 +6846,11 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	case IORING_OP_SYNC_FILE_RANGE:
  		return io_sfr_prep(req, sqe);
  	case IORING_OP_SENDMSG:
+	case IORING_OP_SENDTO:
  	case IORING_OP_SEND:
  		return io_sendmsg_prep(req, sqe);
  	case IORING_OP_RECVMSG:
+	case IORING_OP_RECVFROM:
  	case IORING_OP_RECV:
  		return io_recvmsg_prep(req, sqe);
  	case IORING_OP_CONNECT:
@@ -7060,12 +7130,14 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
  	case IORING_OP_SENDMSG:
  		ret = io_sendmsg(req, issue_flags);
  		break;
+	case IORING_OP_SENDTO:
  	case IORING_OP_SEND:
  		ret = io_sendto(req, issue_flags);
  		break;
  	case IORING_OP_RECVMSG:
  		ret = io_recvmsg(req, issue_flags);
  		break;
+	case IORING_OP_RECVFROM:
  	case IORING_OP_RECV:
  		ret = io_recvfrom(req, issue_flags);
  		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index efc7ac9b3a6b..a360069d1e8e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -150,6 +150,8 @@ enum {
  	IORING_OP_SETXATTR,
  	IORING_OP_FGETXATTR,
  	IORING_OP_GETXATTR,
+	IORING_OP_SENDTO,
+	IORING_OP_RECVFROM,
/* this goes last, obviously */
  	IORING_OP_LAST,


Regards,

~Praveen.


Thanks for the review. I will send the RFC v5 soon.



--
Ammar Faizi

Attachment: OpenPGP_0x364FBA34FF170A4B.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature
Description: OpenPGP digital signature


[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux