On 4/22/22 8:50 AM, Jens Axboe wrote: > On 4/13/22 4:33 AM, Avi Kivity wrote: >> Unfortunately, only ideas, no patches. But at least the first seems very easy. >> >> >> - IORING_OP_MEMCPY_IMMEDIATE - copy some payload included in the op >> itself (1-8 bytes) to a user memory location specified by the op. >> >> >> Linked to another op, this can generate an in-memory notification >> useful for busy-waiters or the UMWAIT instruction >> >> This would be useful for Seastar, which looks at a timer-managed >> memory location to check when to break computation loops. > > This one would indeed be trivial to do. If we limit the max size > supported to eg 8 bytes like suggested, then it could be in the sqe > itself and just copied to the user address specified. > > Eg have sqe->len be the length (1..8 bytes), sqe->addr the destination > address, and sqe->off the data to copy. > > If you'll commit to testing this, I can hack it up pretty quickly... Something like this, totally untested. Maybe the return value should be bytes copied? Just returns 0/error right now. Follows the above convention. diff --git a/fs/io_uring.c b/fs/io_uring.c index 2052a796436c..d2a95f9d9d2d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -586,6 +586,13 @@ struct io_socket { unsigned long nofile; }; +struct io_mem { + struct file *file; + u64 value; + void __user *dest; + u32 len; +}; + struct io_sync { struct file *file; loff_t len; @@ -962,6 +969,7 @@ struct io_kiocb { struct io_msg msg; struct io_xattr xattr; struct io_socket sock; + struct io_mem mem; }; u8 opcode; @@ -1231,16 +1239,19 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_FSETXATTR] = { - .needs_file = 1 + .needs_file = 1, }, [IORING_OP_SETXATTR] = {}, [IORING_OP_FGETXATTR] = { - .needs_file = 1 + .needs_file = 1, }, [IORING_OP_GETXATTR] = {}, [IORING_OP_SOCKET] = { .audit_skip = 1, }, + [IORING_OP_MEMCPY_IMM] = { + .audit_skip = 1, + }, }; /* requests with any of those set should undergo io_disarm_next() */ @@ -5527,6 +5538,38 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) return 0; } +static int io_memcpy_imm_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_mem *mem = &req->mem; + + if (unlikely(sqe->ioprio || sqe->rw_flags || sqe->buf_index || + sqe->splice_fd_in)) + return -EINVAL; + + mem->value = READ_ONCE(sqe->off); + mem->dest = u64_to_user_ptr(READ_ONCE(sqe->addr)); + mem->len = READ_ONCE(sqe->len); + if (!mem->len || mem->len > sizeof(u64)) + return -EINVAL; + + return 0; +} + +static int io_memcpy_imm(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_mem *mem = &req->mem; + int ret = 0; + + if (copy_to_user(mem->dest, &mem->value, mem->len)) + ret = -EFAULT; + + if (ret < 0) + req_set_fail(req); + io_req_complete(req, ret); + return 0; +} + #if defined(CONFIG_NET) static bool io_net_retry(struct socket *sock, int flags) { @@ -7494,6 +7537,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_getxattr_prep(req, sqe); case IORING_OP_SOCKET: return io_socket_prep(req, sqe); + case IORING_OP_MEMCPY_IMM: + return io_memcpy_imm_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -7815,6 +7860,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) case IORING_OP_SOCKET: ret = io_socket(req, issue_flags); break; + case IORING_OP_MEMCPY_IMM: + ret = io_memcpy_imm(req, issue_flags); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5fb52bf32435..853f00a2bddd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -152,6 +152,7 @@ enum { IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, + IORING_OP_MEMCPY_IMM, /* this goes last, obviously */ IORING_OP_LAST, -- Jens Axboe