On Thu, 6 Jan 2022 at 23:01:59 +0530, Praveen Kumar <kpraveen.lkml@xxxxxxxxx> wrote: > On 30-12-2021 23:22, Ammar Faizi wrote: >> This adds sendto(2) and recvfrom(2) support for io_uring. >> >> New opcodes: >> IORING_OP_SENDTO >> IORING_OP_RECVFROM >> >> Cc: Nugra <richiisei@xxxxxxxxx> >> Tested-by: Nugra <richiisei@xxxxxxxxx> >> Link: https://github.com/axboe/liburing/issues/397 >> Signed-off-by: Ammar Faizi <ammarfaizi2@xxxxxxxxx> >> --- >> >> v3: >> - Fix build error when CONFIG_NET is undefined should be done in >> the first patch, not this patch. >> >> - Add Tested-by tag from Nugra. >> >> v2: >> - In `io_recvfrom()`, mark the error check of `move_addr_to_user()` >> call as unlikely. >> >> - Fix build error when CONFIG_NET is undefined. >> >> - Added Nugra to CC list (tester). >> --- >> fs/io_uring.c | 80 +++++++++++++++++++++++++++++++++-- >> include/uapi/linux/io_uring.h | 2 + >> 2 files changed, 78 insertions(+), 4 deletions(-) >> >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index 7adcb591398f..3726958f8f58 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -575,7 +575,15 @@ struct io_sr_msg { >> union { >> struct compat_msghdr __user *umsg_compat; >> struct user_msghdr __user *umsg; >> - void __user *buf; >> + >> + struct { >> + void __user *buf; >> + struct sockaddr __user *addr; >> + union { >> + int sendto_addr_len; >> + int __user *recvfrom_addr_len; >> + }; >> + }; >> }; >> int msg_flags; >> int bgid; >> @@ -1133,6 +1141,19 @@ static const struct io_op_def io_op_defs[] = { >> .needs_file = 1 >> }, >> [IORING_OP_GETXATTR] = {}, >> + [IORING_OP_SENDTO] = { >> + .needs_file = 1, >> + .unbound_nonreg_file = 1, >> + .pollout = 1, >> + .audit_skip = 1, >> + }, >> + [IORING_OP_RECVFROM] = { >> + .needs_file = 1, >> + .unbound_nonreg_file = 1, >> + .pollin = 1, >> + .buffer_select = 1, >> + .audit_skip = 1, >> + }, >> }; >> >> /* requests with any of those set should undergo io_disarm_next() */ >> @@ -5216,12 +5237,24 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) >> return -EINVAL; >> >> + /* >> + * For IORING_OP_SEND{,TO}, the assignment to @sr->umsg >> + * is equivalent to an assignment to @sr->buf. >> + */ >> sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); >> + >> sr->len = READ_ONCE(sqe->len); >> sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; >> if (sr->msg_flags & MSG_DONTWAIT) >> req->flags |= REQ_F_NOWAIT; >> >> + if (req->opcode == IORING_OP_SENDTO) { >> + sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); >> + sr->sendto_addr_len = READ_ONCE(sqe->addr3); >> + } else { >> + sr->addr = (struct sockaddr __user *) NULL; > > Let's have sendto_addr_len = 0 Will do in the RFC v5. > >> + } >> + >> #ifdef CONFIG_COMPAT >> if (req->ctx->compat) >> sr->msg_flags |= MSG_CMSG_COMPAT; >> @@ -5275,6 +5308,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) >> >> static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >> { >> + struct sockaddr_storage address; >> struct io_sr_msg *sr = &req->sr_msg; >> struct msghdr msg; >> struct iovec iov; >> @@ -5291,10 +5325,20 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >> if (unlikely(ret)) >> return ret; >> >> - msg.msg_name = NULL; >> + >> msg.msg_control = NULL; >> msg.msg_controllen = 0; >> - msg.msg_namelen = 0; >> + if (sr->addr) { >> + ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len, >> + &address); >> + if (unlikely(ret < 0)) >> + goto fail; >> + msg.msg_name = (struct sockaddr *) &address; >> + msg.msg_namelen = sr->sendto_addr_len; >> + } else { >> + msg.msg_name = NULL; >> + msg.msg_namelen = 0; >> + } >> >> flags = req->sr_msg.msg_flags; >> if (issue_flags & IO_URING_F_NONBLOCK) >> @@ -5309,6 +5353,7 @@ static int io_sendto(struct io_kiocb *req, unsigned int issue_flags) >> return -EAGAIN; >> if (ret == -ERESTARTSYS) >> ret = -EINTR; >> + fail: >> req_set_fail(req); > > I think there is a problem with "fail" goto statement. Not getting > full clarity on this change. With latest kernel, I see > req_set_fail(req) inside if check, which I don't see here. Can you > please resend the patch on latest kernel version. Thanks. I will send the v5 on top of "for-next" branch in Jens' tree soon. That is already inside an "if check" anyway. We go to that label when the move_addr_to_kernel() fails (most of the time it is -EFAULT or -EINVAL). That part looks like this (note the if check before the goto): ---------------------------------------------------------------------- msg.msg_control = NULL; msg.msg_controllen = 0; if (sr->addr) { ret = move_addr_to_kernel(sr->addr, sr->sendto_addr_len, &address); if (unlikely(ret < 0)) goto fail; msg.msg_name = (struct sockaddr *) &address; msg.msg_namelen = sr->sendto_addr_len; } else { msg.msg_name = NULL; msg.msg_namelen = 0; } flags = req->sr_msg.msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; if (flags & MSG_WAITALL) min_ret = iov_iter_count(&msg.msg_iter); msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return -EAGAIN; if (ret == -ERESTARTSYS) ret = -EINTR; fail: req_set_fail(req); } __io_req_complete(req, issue_flags, ret, 0); return 0; ---------------------------------------------------------------------- >> } >> __io_req_complete(req, issue_flags, ret, 0); >> @@ -5427,13 +5472,25 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) >> return -EINVAL; >> >> + /* >> + * For IORING_OP_RECV{,FROM}, the assignment to @sr->umsg >> + * is equivalent to an assignment to @sr->buf. >> + */ >> sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); >> + >> sr->len = READ_ONCE(sqe->len); >> sr->bgid = READ_ONCE(sqe->buf_group); >> sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; >> if (sr->msg_flags & MSG_DONTWAIT) >> req->flags |= REQ_F_NOWAIT; >> >> + if (req->opcode == IORING_OP_RECVFROM) { >> + sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); >> + sr->recvfrom_addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr3)); >> + } else { >> + sr->addr = (struct sockaddr __user *) NULL; > > I think recvfrom_addr_len should also be pointed to NULL, instead of > garbage for this case. Will do in the RFC v5. > >> + } >> + >> #ifdef CONFIG_COMPAT >> if (req->ctx->compat) >> sr->msg_flags |= MSG_CMSG_COMPAT; >> @@ -5509,6 +5566,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >> struct iovec iov; >> unsigned flags; >> int ret, min_ret = 0; >> + struct sockaddr_storage address; >> bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; >> >> sock = sock_from_file(req->file); >> @@ -5526,7 +5584,7 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >> if (unlikely(ret)) >> goto out_free; >> >> - msg.msg_name = NULL; >> + msg.msg_name = sr->addr ? (struct sockaddr *) &address : NULL; >> msg.msg_control = NULL; >> msg.msg_controllen = 0; >> msg.msg_namelen = 0; > > I think namelen should also be updated ? It doesn't have to be updated. From net/socket.c there is a comment like this: /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; Full __sys_recvfrom() source code, see here: https://github.com/torvalds/linux/blob/v5.16-rc8/net/socket.c#L2085-L2088 I will add the same comment in next series to clarify this one. > >> @@ -5540,6 +5598,16 @@ static int io_recvfrom(struct io_kiocb *req, unsigned int issue_flags) >> min_ret = iov_iter_count(&msg.msg_iter); >> >> ret = sock_recvmsg(sock, &msg, flags); >> + >> + if (ret >= 0 && sr->addr != NULL) { >> + int tmp; >> + >> + tmp = move_addr_to_user(&address, msg.msg_namelen, sr->addr, >> + sr->recvfrom_addr_len); >> + if (unlikely(tmp < 0)) >> + ret = tmp; >> + } >> + >> out_free: >> if (ret < min_ret) { >> if (ret == -EAGAIN && force_nonblock) >> @@ -6778,9 +6846,11 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) >> case IORING_OP_SYNC_FILE_RANGE: >> return io_sfr_prep(req, sqe); >> case IORING_OP_SENDMSG: >> + case IORING_OP_SENDTO: >> case IORING_OP_SEND: >> return io_sendmsg_prep(req, sqe); >> case IORING_OP_RECVMSG: >> + case IORING_OP_RECVFROM: >> case IORING_OP_RECV: >> return io_recvmsg_prep(req, sqe); >> case IORING_OP_CONNECT: >> @@ -7060,12 +7130,14 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) >> case IORING_OP_SENDMSG: >> ret = io_sendmsg(req, issue_flags); >> break; >> + case IORING_OP_SENDTO: >> case IORING_OP_SEND: >> ret = io_sendto(req, issue_flags); >> break; >> case IORING_OP_RECVMSG: >> ret = io_recvmsg(req, issue_flags); >> break; >> + case IORING_OP_RECVFROM: >> case IORING_OP_RECV: >> ret = io_recvfrom(req, issue_flags); >> break; >> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >> index efc7ac9b3a6b..a360069d1e8e 100644 >> --- a/include/uapi/linux/io_uring.h >> +++ b/include/uapi/linux/io_uring.h >> @@ -150,6 +150,8 @@ enum { >> IORING_OP_SETXATTR, >> IORING_OP_FGETXATTR, >> IORING_OP_GETXATTR, >> + IORING_OP_SENDTO, >> + IORING_OP_RECVFROM, >> >> /* this goes last, obviously */ >> IORING_OP_LAST, > > > Regards, > > ~Praveen. > Thanks for the review. I will send the RFC v5 soon. -- Ammar Faizi