Re: memory access op ideas

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/23/22 11:32 AM, Jens Axboe wrote:
>> I guess copy_to_user saves us from having to consider endianness.
> 
> I was considering that too, definitely something that should be
> investigated. Making it a 1/2/4/8 switch and using put_user() is
> probably a better idea. Easy enough to benchmark.

FWIW, this is the current version. Some quick benchmarking doesn't show
any difference between copy_to_user and put_user, but that may depend on
the arch as well (using aarch64). But we might as well use put user and
combine it with the length check, so we explicitly only support 1/2/4/8
sizes.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2052a796436c..3b94cb4b67ed 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -586,6 +586,14 @@ struct io_socket {
 	unsigned long			nofile;
 };
 
+struct io_mem {
+	struct file			*file;
+	u64				value;
+	void __user			*dest;
+	u32				len;
+	u32				flags;
+};
+
 struct io_sync {
 	struct file			*file;
 	loff_t				len;
@@ -962,6 +970,7 @@ struct io_kiocb {
 		struct io_msg		msg;
 		struct io_xattr		xattr;
 		struct io_socket	sock;
+		struct io_mem		mem;
 	};
 
 	u8				opcode;
@@ -1231,16 +1240,19 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_file		= 1,
 	},
 	[IORING_OP_FSETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
 	},
 	[IORING_OP_SETXATTR] = {},
 	[IORING_OP_FGETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
 	},
 	[IORING_OP_GETXATTR] = {},
 	[IORING_OP_SOCKET] = {
 		.audit_skip		= 1,
 	},
+	[IORING_OP_MEMCPY] = {
+		.audit_skip		= 1,
+	},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -5527,6 +5539,71 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_memcpy_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_mem *mem = &req->mem;
+
+	if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
+		return -EINVAL;
+
+	mem->value = READ_ONCE(sqe->off);
+	mem->dest = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	mem->len = READ_ONCE(sqe->len);
+	if (!mem->len || mem->len > sizeof(u64))
+		return -EINVAL;
+
+	mem->flags = READ_ONCE(sqe->memcpy_flags);
+	if (mem->flags & ~IORING_MEMCPY_IMM)
+		return -EINVAL;
+
+	/* only supports immediate mode for now */
+	if (!(mem->flags & IORING_MEMCPY_IMM))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int io_memcpy(struct io_kiocb *req)
+{
+	struct io_mem *mem = &req->mem;
+	int ret = mem->len;
+
+	switch (mem->len) {
+	case 1: {
+		u8 val = mem->value;
+		if (put_user(val, (u8 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 2: {
+		u16 val = mem->value;
+		if (put_user(val, (u16 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 4: {
+		u32 val = mem->value;
+		if (put_user(val, (u32 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	case 8: {
+		u64 val = mem->value;
+		if (put_user(val, (u64 *) mem->dest))
+			ret = -EFAULT;
+		break;
+		}
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_complete(req, ret);
+	return 0;
+}
+
 #if defined(CONFIG_NET)
 static bool io_net_retry(struct socket *sock, int flags)
 {
@@ -7494,6 +7571,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_getxattr_prep(req, sqe);
 	case IORING_OP_SOCKET:
 		return io_socket_prep(req, sqe);
+	case IORING_OP_MEMCPY:
+		return io_memcpy_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7815,6 +7894,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_SOCKET:
 		ret = io_socket(req, issue_flags);
 		break;
+	case IORING_OP_MEMCPY:
+		ret = io_memcpy(req);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5fb52bf32435..9e69d70a3b5b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -46,6 +46,7 @@ struct io_uring_sqe {
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
 		__u32		xattr_flags;
+		__u32		memcpy_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -152,6 +153,7 @@ enum {
 	IORING_OP_FGETXATTR,
 	IORING_OP_GETXATTR,
 	IORING_OP_SOCKET,
+	IORING_OP_MEMCPY,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -206,6 +208,14 @@ enum {
 #define IORING_ASYNC_CANCEL_FD	(1U << 1)
 #define IORING_ASYNC_CANCEL_ANY	(1U << 2)
 
+/*
+ * IORING_OP_MEMCPY flags.
+ *
+ * IORING_MEMCPY_IMM		Immediate copy. 'off' contains an immediate
+ *				value. If not set, 'off' is a source address.
+ */
+#define IORING_MEMCPY_IMM	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */

-- 
Jens Axboe




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux