Re: memory access op ideas

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 22/04/2022 18.03, Jens Axboe wrote:
On 4/22/22 8:50 AM, Jens Axboe wrote:
On 4/13/22 4:33 AM, Avi Kivity wrote:
Unfortunately, only ideas, no patches. But at least the first seems very easy.


- IORING_OP_MEMCPY_IMMEDIATE - copy some payload included in the op
itself (1-8 bytes) to a user memory location specified by the op.


Linked to another op, this can generate an in-memory notification
useful for busy-waiters or the UMWAIT instruction

This would be useful for Seastar, which looks at a timer-managed
memory location to check when to break computation loops.
This one would indeed be trivial to do. If we limit the max size
supported to eg 8 bytes like suggested, then it could be in the sqe
itself and just copied to the user address specified.

Eg have sqe->len be the length (1..8 bytes), sqe->addr the destination
address, and sqe->off the data to copy.

If you'll commit to testing this, I can hack it up pretty quickly...
Something like this, totally untested. Maybe the return value should be
bytes copied?


Yes. It could be less than what the user expected, unless we enforce alignment (perhaps we should).


Just returns 0/error right now.

Follows the above convention.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2052a796436c..d2a95f9d9d2d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -586,6 +586,13 @@ struct io_socket {
  	unsigned long			nofile;
  };
+struct io_mem {
+	struct file			*file;
+	u64				value;
+	void __user			*dest;
+	u32				len;
+};
+


  struct io_sync {
  	struct file			*file;
  	loff_t				len;
@@ -962,6 +969,7 @@ struct io_kiocb {
  		struct io_msg		msg;
  		struct io_xattr		xattr;
  		struct io_socket	sock;
+		struct io_mem		mem;
  	};
u8 opcode;
@@ -1231,16 +1239,19 @@ static const struct io_op_def io_op_defs[] = {
  		.needs_file		= 1,
  	},
  	[IORING_OP_FSETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
  	},
  	[IORING_OP_SETXATTR] = {},
  	[IORING_OP_FGETXATTR] = {
-		.needs_file = 1
+		.needs_file		= 1,
  	},
  	[IORING_OP_GETXATTR] = {},
  	[IORING_OP_SOCKET] = {
  		.audit_skip		= 1,
  	},
+	[IORING_OP_MEMCPY_IMM] = {
+		.audit_skip		= 1,
+	},
  };
/* requests with any of those set should undergo io_disarm_next() */
@@ -5527,6 +5538,38 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
  	return 0;
  }
+static int io_memcpy_imm_prep(struct io_kiocb *req,
+			      const struct io_uring_sqe *sqe)
+{
+	struct io_mem *mem = &req->mem;
+
+	if (unlikely(sqe->ioprio || sqe->rw_flags || sqe->buf_index ||
+		     sqe->splice_fd_in))
+		return -EINVAL;
+
+	mem->value = READ_ONCE(sqe->off);
+	mem->dest = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	mem->len = READ_ONCE(sqe->len);
+	if (!mem->len || mem->len > sizeof(u64))
+		return -EINVAL;
+


I'd also check that the length is a power-of-two to avoid having to deal with weird sizes if we later find it inconvenient.


+	return 0;
+}
+
+static int io_memcpy_imm(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_mem *mem = &req->mem;
+	int ret = 0;
+
+	if (copy_to_user(mem->dest, &mem->value, mem->len))
+		ret = -EFAULT;
+


Is copy_to_user efficient for tiny sizes? Or is it better to use a switch and put_user()?


I guess copy_to_user saves us from having to consider endianness.


+	if (ret < 0)
+		req_set_fail(req);
+	io_req_complete(req, ret);
+	return 0;
+}
+
  #if defined(CONFIG_NET)
  static bool io_net_retry(struct socket *sock, int flags)
  {
@@ -7494,6 +7537,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  		return io_getxattr_prep(req, sqe);
  	case IORING_OP_SOCKET:
  		return io_socket_prep(req, sqe);
+	case IORING_OP_MEMCPY_IMM:
+		return io_memcpy_imm_prep(req, sqe);
  	}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7815,6 +7860,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
  	case IORING_OP_SOCKET:
  		ret = io_socket(req, issue_flags);
  		break;
+	case IORING_OP_MEMCPY_IMM:
+		ret = io_memcpy_imm(req, issue_flags);
+		break;
  	default:
  		ret = -EINVAL;
  		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5fb52bf32435..853f00a2bddd 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -152,6 +152,7 @@ enum {
  	IORING_OP_FGETXATTR,
  	IORING_OP_GETXATTR,
  	IORING_OP_SOCKET,
+	IORING_OP_MEMCPY_IMM,
/* this goes last, obviously */
  	IORING_OP_LAST,




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux