On Fri, 7 Jan 2022 at 03:38:50 +0700, Ammar Faizi <ammarfaizi2@xxxxxxxxxxx> wrote: > On Thu, 6 Jan 2022 at 23:01:59 +0530, Praveen Kumar <kpraveen.lkml@xxxxxxxxx> wrote: >> On 30-12-2021 23:22, Ammar Faizi wrote: >>> This adds sendto(2) and recvfrom(2) support for io_uring. >>> >>> New opcodes: >>> IORING_OP_SENDTO >>> IORING_OP_RECVFROM >>> >>> Cc: Nugra <richiisei@xxxxxxxxx> >>> Tested-by: Nugra <richiisei@xxxxxxxxx> >>> Link: https://github.com/axboe/liburing/issues/397 >>> Signed-off-by: Ammar Faizi <ammarfaizi2@xxxxxxxxx> >>> --- >>> >>> v3: >>> - Fix build error when CONFIG_NET is undefined should be done in >>> the first patch, not this patch. >>> >>> - Add Tested-by tag from Nugra. >>> >>> v2: >>> - In `io_recvfrom()`, mark the error check of `move_addr_to_user()` >>> call as unlikely. >>> >>> - Fix build error when CONFIG_NET is undefined. >>> >>> - Added Nugra to CC list (tester). >>> --- >>> fs/io_uring.c | 80 +++++++++++++++++++++++++++++++++-- >>> include/uapi/linux/io_uring.h | 2 + >>> 2 files changed, 78 insertions(+), 4 deletions(-) >>> >>> diff --git a/fs/io_uring.c b/fs/io_uring.c >>> index 7adcb591398f..3726958f8f58 100644 >>> --- a/fs/io_uring.c >>> +++ b/fs/io_uring.c >>> @@ -575,7 +575,15 @@ struct io_sr_msg { >>> union { >>> struct compat_msghdr __user *umsg_compat; >>> struct user_msghdr __user *umsg; >>> - void __user *buf; >>> + >>> + struct { >>> + void __user *buf; >>> + struct sockaddr __user *addr; >>> + union { >>> + int sendto_addr_len; >>> + int __user *recvfrom_addr_len; >>> + }; >>> + }; >>> }; >>> int msg_flags; >>> int bgid; >>> @@ -1133,6 +1141,19 @@ static const struct io_op_def io_op_defs[] = { >>> .needs_file = 1 >>> }, >>> [IORING_OP_GETXATTR] = {}, >>> + [IORING_OP_SENDTO] = { >>> + .needs_file = 1, >>> + .unbound_nonreg_file = 1, >>> + .pollout = 1, >>> + .audit_skip = 1, >>> + }, >>> + [IORING_OP_RECVFROM] = { >>> + .needs_file = 1, >>> + .unbound_nonreg_file = 1, >>> + .pollin = 1, >>> + .buffer_select = 1, >>> + .audit_skip = 1, >>> + }, >>> }; >>> >>> /* requests with any of those set should undergo io_disarm_next() */ >>> @@ -5216,12 +5237,24 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >>> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) >>> return -EINVAL; >>> >>> + /* >>> + * For IORING_OP_SEND{,TO}, the assignment to @sr->umsg >>> + * is equivalent to an assignment to @sr->buf. >>> + */ >>> sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); >>> + >>> sr->len = READ_ONCE(sqe->len); >>> sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; >>> if (sr->msg_flags & MSG_DONTWAIT) >>> req->flags |= REQ_F_NOWAIT; >>> >>> + if (req->opcode == IORING_OP_SENDTO) { >>> + sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); >>> + sr->sendto_addr_len = READ_ONCE(sqe->addr3); >>> + } else { >>> + sr->addr = (struct sockaddr __user *) NULL; >> >> Let's have sendto_addr_len = 0 > > Will do in the RFC v5. > >> >>> + } >>> + >>> #ifdef CONFIG_COMPAT >>> if (req->ctx->compat) >>> sr->msg_flags |= MSG_CMSG_COMPAT; >>> @@ -5275,6 +5308,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) >>> >>> static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >>> { >>> + struct sockaddr_storage address; >>> struct io_sr_msg *sr = &req->sr_msg; >>> struct msghdr msg; >>> struct iovec iov; >>> @@ -5291,10 +5325,20 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >>> if (unlikely(ret)) >>> return ret; >>> >>> - msg.msg_name = NULL; >>> + >>> msg.msg_control = NULL; >>> msg.msg_controllen = 0; >>> - msg.msg_namelen = 0; >>> + if (sr->addr) { >>> + ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len, >>> + &address); >>> + if (unlikely(ret < 0)) >>> + goto fail; >>> + msg.msg_name = (struct sockaddr *) &address; >>> + msg.msg_namelen = sr->sendto_addr_len; >>> + } else { >>> + msg.msg_name = NULL; >>> + msg.msg_namelen = 0; >>> + } >>> >>> flags = req->sr_msg.msg_flags; >>> if (issue_flags & IO_URING_F_NONBLOCK) >>> @@ -5309,6 +5353,7 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >>> return -EAGAIN; >>> if (ret == -ERESTARTSYS) >>> ret = -EINTR; >>> + fail: >>> req_set_fail(req); >> >> I think there is a problem with "fail" goto statement. Not getting >> full clarity on this change. With latest kernel, I see >> req_set_fail(req) inside if check, which I don't see here. Can you >> please resend the patch on latest kernel version. Thanks. > > I will send the v5 on top of "for-next" branch in Jens' tree soon. > > That is already inside an "if check" anyway. We go to that label when > the move_addr_to_kernel() fails (most of the time it is -EFAULT or > -EINVAL). > > That part looks like this (note the if check before the goto): > ---------------------------------------------------------------------- > msg.msg_control = NULL; > msg.msg_controllen = 0; > if (sr->addr) { > ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len, > &address); > if (unlikely(ret < 0)) > goto fail; > msg.msg_name = (struct sockaddr *) &address; > msg.msg_namelen = sr->sendto_addr_len; > } else { > msg.msg_name = NULL; > msg.msg_namelen = 0; > } > > flags = req->sr_msg.msg_flags; > if (issue_flags & IO_URING_F_NONBLOCK) > flags |= MSG_DONTWAIT; > if (flags & MSG_WAITALL) > min_ret = iov_iter_count(&msg.msg_iter); > > msg.msg_flags = flags; > ret = sock_sendmsg(sock, &msg); > if (ret < min_ret) { > if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) > return -EAGAIN; > if (ret == -ERESTARTSYS) > ret = -EINTR; > fail: > req_set_fail(req); > } > __io_req_complete(req, issue_flags, ret, 0); > return 0; > ---------------------------------------------------------------------- > >>> } >>> __io_req_complete(req, issue_flags, ret, 0); >>> @@ -5427,13 +5472,25 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >>> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) >>> return -EINVAL; >>> >>> + /* >>> + * For IORING_OP_RECV{,FROM}, the assignment to @sr->umsg >>> + * is equivalent to an assignment to @sr->buf. >>> + */ >>> sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); >>> + >>> sr->len = READ_ONCE(sqe->len); >>> sr->bgid = READ_ONCE(sqe->buf_group); >>> sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; >>> if (sr->msg_flags & MSG_DONTWAIT) >>> req->flags |= REQ_F_NOWAIT; >>> >>> + if (req->opcode == IORING_OP_RECVFROM) { >>> + sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); >>> + sr->recvfrom_addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr3)); >>> + } else { >>> + sr->addr = (struct sockaddr __user *) NULL; >> >> I think recvfrom_addr_len should also be pointed to NULL, instead of >> garbage for this case. > > Will do in the RFC v5. > >> >>> + } >>> + >>> #ifdef CONFIG_COMPAT >>> if (req->ctx->compat) >>> sr->msg_flags |= MSG_CMSG_COMPAT; >>> @@ -5509,6 +5566,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >>> struct iovec iov; >>> unsigned flags; >>> int ret, min_ret = 0; >>> + struct sockaddr_storage address; >>> bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; >>> >>> sock = sock_from_file(req->file); >>> @@ -5526,7 +5584,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >>> if (unlikely(ret)) >>> goto out_free; >>> >>> - msg.msg_name = NULL; >>> + msg.msg_name = sr->addr ? (struct sockaddr *) &address : NULL; >>> msg.msg_control = NULL; >>> msg.msg_controllen = 0; >>> msg.msg_namelen = 0; >> >> I think namelen should also be updated ? > > It doesn't have to be updated. From net/socket.c there is a comment > like this: > > /* We assume all kernel code knows the size of sockaddr_storage */ > msg.msg_namelen = 0; > > Full __sys_recvfrom() source code, see here: > https://github.com/torvalds/linux/blob/v5.16-rc8/net/socket.c#L2085-L2088 > > I will add the same comment in next series to clarify this one. > >> >>> @@ -5540,6 +5598,16 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >>> min_ret = iov_iter_count(&msg.msg_iter); >>> >>> ret = sock_recvmsg(sock, &msg, flags); >>> + >>> + if (ret >= 0 && sr->addr != NULL) { >>> + int tmp; >>> + >>> + tmp = move_addr_to_user(&address, msg.msg_namelen, sr->addr, >>> + sr->recvfrom_addr_len); >>> + if (unlikely(tmp < 0)) >>> + ret = tmp; >>> + } >>> + >>> out_free: >>> if (ret < min_ret) { >>> if (ret == -EAGAIN && force_nonblock) >>> @@ -6778,9 +6846,11 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >>> case IORING_OP_SYNC_FILE_RANGE: >>> return io_sfr_prep(req, sqe); >>> case IORING_OP_SENDMSG: >>> + case IORING_OP_SENDTO: >>> case IORING_OP_SEND: >>> return io_sendmsg_prep(req, sqe); >>> case IORING_OP_RECVMSG: >>> + case IORING_OP_RECVFROM: >>> case IORING_OP_RECV: >>> return io_recvmsg_prep(req, sqe); >>> case IORING_OP_CONNECT: >>> @@ -7060,12 +7130,14 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) >>> case IORING_OP_SENDMSG: >>> ret = io_sendmsg(req, issue_flags); >>> break; >>> + case IORING_OP_SENDTO: >>> case IORING_OP_SEND: >>> ret = io_sendto(req, issue_flags); >>> break; >>> case IORING_OP_RECVMSG: >>> ret = io_recvmsg(req, issue_flags); >>> break; >>> + case IORING_OP_RECVFROM: >>> case IORING_OP_RECV: >>> ret = io_recvfrom(req, issue_flags); >>> break; >>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>> index efc7ac9b3a6b..a360069d1e8e 100644 >>> --- a/include/uapi/linux/io_uring.h >>> +++ b/include/uapi/linux/io_uring.h >>> @@ -150,6 +150,8 @@ enum { >>> IORING_OP_SETXATTR, >>> IORING_OP_FGETXATTR, >>> IORING_OP_GETXATTR, >>> + IORING_OP_SENDTO, >>> + IORING_OP_RECVFROM, >>> >>> /* this goes last, obviously */ >>> IORING_OP_LAST, >> >> >> Regards, >> >> ~Praveen. >> > > Thanks for the review. I will send the RFC v5 soon. > > -- > Ammar Faizi s/v5/v4/g -- Ammar Faizi