On 4/23/22 11:32 AM, Jens Axboe wrote: >> I guess copy_to_user saves us from having to consider endianness. > > I was considering that too, definitely something that should be > investigated. Making it a 1/2/4/8 switch and using put_user() is > probably a better idea. Easy enough to benchmark. FWIW, this is the current version. Some quick benchmarking doesn't show any difference between copy_to_user and put_user, but that may depend on the arch as well (using aarch64). But we might as well use put user and combine it with the length check, so we explicitly only support 1/2/4/8 sizes. diff --git a/fs/io_uring.c b/fs/io_uring.c index 2052a796436c..3b94cb4b67ed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -586,6 +586,14 @@ struct io_socket { unsigned long nofile; }; +struct io_mem { + struct file *file; + u64 value; + void __user *dest; + u32 len; + u32 flags; +}; + struct io_sync { struct file *file; loff_t len; @@ -962,6 +970,7 @@ struct io_kiocb { struct io_msg msg; struct io_xattr xattr; struct io_socket sock; + struct io_mem mem; }; u8 opcode; @@ -1231,16 +1240,19 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_FSETXATTR] = { - .needs_file = 1 + .needs_file = 1, }, [IORING_OP_SETXATTR] = {}, [IORING_OP_FGETXATTR] = { - .needs_file = 1 + .needs_file = 1, }, [IORING_OP_GETXATTR] = {}, [IORING_OP_SOCKET] = { .audit_skip = 1, }, + [IORING_OP_MEMCPY] = { + .audit_skip = 1, + }, }; /* requests with any of those set should undergo io_disarm_next() */ @@ -5527,6 +5539,71 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) return 0; } +static int io_memcpy_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_mem *mem = &req->mem; + + if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + mem->value = READ_ONCE(sqe->off); + mem->dest = u64_to_user_ptr(READ_ONCE(sqe->addr)); + mem->len = READ_ONCE(sqe->len); + if (!mem->len || mem->len > sizeof(u64)) + return -EINVAL; + + mem->flags = READ_ONCE(sqe->memcpy_flags); + if (mem->flags & ~IORING_MEMCPY_IMM) + return -EINVAL; + + /* only supports immediate mode for now */ + if (!(mem->flags & IORING_MEMCPY_IMM)) + return -EINVAL; + + return 0; +} + +static int io_memcpy(struct io_kiocb *req) +{ + struct io_mem *mem = &req->mem; + int ret = mem->len; + + switch (mem->len) { + case 1: { + u8 val = mem->value; + if (put_user(val, (u8 *) mem->dest)) + ret = -EFAULT; + break; + } + case 2: { + u16 val = mem->value; + if (put_user(val, (u16 *) mem->dest)) + ret = -EFAULT; + break; + } + case 4: { + u32 val = mem->value; + if (put_user(val, (u32 *) mem->dest)) + ret = -EFAULT; + break; + } + case 8: { + u64 val = mem->value; + if (put_user(val, (u64 *) mem->dest)) + ret = -EFAULT; + break; + } + default: + ret = -EINVAL; + break; + } + + if (ret < 0) + req_set_fail(req); + io_req_complete(req, ret); + return 0; +} + #if defined(CONFIG_NET) static bool io_net_retry(struct socket *sock, int flags) { @@ -7494,6 +7571,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_getxattr_prep(req, sqe); case IORING_OP_SOCKET: return io_socket_prep(req, sqe); + case IORING_OP_MEMCPY: + return io_memcpy_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -7815,6 +7894,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) case IORING_OP_SOCKET: ret = io_socket(req, issue_flags); break; + case IORING_OP_MEMCPY: + ret = io_memcpy(req); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5fb52bf32435..9e69d70a3b5b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -46,6 +46,7 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; + __u32 memcpy_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -152,6 +153,7 @@ enum { IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, + IORING_OP_MEMCPY, /* this goes last, obviously */ IORING_OP_LAST, @@ -206,6 +208,14 @@ enum { #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +/* + * IORING_OP_MEMCPY flags. + * + * IORING_MEMCPY_IMM Immediate copy. 'off' contains an immediate + * value. If not set, 'off' is a source address. + */ +#define IORING_MEMCPY_IMM (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- Jens Axboe