Re: [RFC] io_uring: stop only support read/write for ctx with IORING_SETUP_IOPOLL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2019/11/4 18:09, Bob Liu wrote:
On 11/4/19 4:56 PM, yangerkun wrote:
There is no problem to support other type request for the ctx with
IORING_SETUP_IOPOLL.

Could you describe the benefit of doing this?

Hi,

I am trying to replace libaio with io_uring in InnoDB/MariaDB(which
build on xfs/nvme). And in order to simulate the timeout mechanism
like io_getevents, firstly, to use the poll function of io_uring's fd has been selected and it really did work. But while trying to enable
IORING_SETUP_IOPOLL since xfs has iopoll function interface, the
mechanism will fail since io_uring_poll does check the cq.head between
cached_cq_tail, which will not update until we call io_uring_enter and
do the poll. So, instead, I decide to use timeout requests in
io_uring but will return -EINVAL since we enable IORING_SETUP_IOPOLL
at the same time. I think this combination is a normal scene so as
the other combination descibed in this patch. I am not sure does it a
good solution for this problem, and maybe there exists some better way.

Thanks,
Kun.




What we should do is just insert read/write
requests to poll list, and protect cqes with comopletion_lock in
io_iopoll_complete since other requests now can be completed as the same
time while we do io_iopoll_complete.

Signed-off-by: yangerkun <yangerkun@xxxxxxxxxx>
---
  fs/io_uring.c | 29 +++++++++--------------------
  1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f9a38998f2fc..8bf52d9a2c55 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -753,9 +753,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
  {
  	void *reqs[IO_IOPOLL_BATCH];
  	struct io_kiocb *req;
+	unsigned long flags;
  	int to_free;
to_free = 0;
+	spin_lock_irqsave(&ctx->completion_lock, flags);
  	while (!list_empty(done)) {
  		req = list_first_entry(done, struct io_kiocb, list);
  		list_del(&req->list);
@@ -781,6 +783,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
  	}
io_commit_cqring(ctx);
+	spin_unlock_irqrestore(&ctx->completion_lock, flags);
  	io_free_req_many(ctx, reqs, &to_free);
  }
@@ -1517,9 +1520,6 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
  	struct io_ring_ctx *ctx = req->ctx;
  	long err = 0;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-
  	io_cqring_add_event(ctx, user_data, err);
  	io_put_req(req);
  	return 0;
@@ -1527,13 +1527,9 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-	struct io_ring_ctx *ctx = req->ctx;
-
  	if (!req->file)
  		return -EBADF;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
  	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
  		return -EINVAL;
@@ -1574,14 +1570,11 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-	struct io_ring_ctx *ctx = req->ctx;
  	int ret = 0;
if (!req->file)
  		return -EBADF;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
  	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
  		return -EINVAL;
@@ -1627,9 +1620,6 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
  	struct socket *sock;
  	int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
-
  	sock = sock_from_file(req->file, &ret);
  	if (sock) {
  		struct user_msghdr __user *msg;
@@ -1712,8 +1702,6 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	struct io_kiocb *poll_req, *next;
  	int ret = -ENOENT;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
  	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
  	    sqe->poll_events)
  		return -EINVAL;
@@ -1833,8 +1821,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	__poll_t mask;
  	u16 events;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
  	if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
  		return -EINVAL;
  	if (!poll->file)
@@ -1932,8 +1918,6 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	struct timespec64 ts;
  	unsigned span = 0;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-		return -EINVAL;
  	if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
  	    sqe->len != 1)
  		return -EINVAL;
@@ -2032,6 +2016,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  			   const struct sqe_submit *s, bool force_nonblock)
  {
  	int ret, opcode;
+	bool poll = false;
req->user_data = READ_ONCE(s->sqe->user_data); @@ -2046,17 +2031,21 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  	case IORING_OP_READV:
  		if (unlikely(s->sqe->buf_index))
  			return -EINVAL;
+		poll = true;
  		ret = io_read(req, s, force_nonblock);
  		break;
  	case IORING_OP_WRITEV:
  		if (unlikely(s->sqe->buf_index))
  			return -EINVAL;
+		poll = true;
  		ret = io_write(req, s, force_nonblock);
  		break;
  	case IORING_OP_READ_FIXED:
+		poll = true;
  		ret = io_read(req, s, force_nonblock);
  		break;
  	case IORING_OP_WRITE_FIXED:
+		poll = true;
  		ret = io_write(req, s, force_nonblock);
  		break;
  	case IORING_OP_FSYNC:
@@ -2088,7 +2077,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  	if (ret)
  		return ret;
- if (ctx->flags & IORING_SETUP_IOPOLL) {
+	if ((ctx->flags & IORING_SETUP_IOPOLL) && poll) {
  		if (req->result == -EAGAIN)
  			return -EAGAIN;


.





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux