Re: [PATCH 3/5] io_uring: let fast poll support multishot

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/6/22 08:01, Hao Xu wrote:
From: Hao Xu <howeyxu@xxxxxxxxxxx>

For operations like accept, multishot is a useful feature, since we can
reduce a number of accept sqe. Let's integrate it to fast poll, it may
be good for other operations in the future.

Signed-off-by: Hao Xu <howeyxu@xxxxxxxxxxx>
---
  fs/io_uring.c | 41 ++++++++++++++++++++++++++---------------
  1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8ebb1a794e36..d33777575faf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5952,7 +5952,7 @@ static void io_poll_remove_entries(struct io_kiocb *req)
   * either spurious wakeup or multishot CQE is served. 0 when it's done with
   * the request, then the mask is stored in req->cqe.res.
   */
-static int io_poll_check_events(struct io_kiocb *req, bool locked)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
  {
  	struct io_ring_ctx *ctx = req->ctx;
  	int v;
@@ -5981,17 +5981,26 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
/* multishot, just fill an CQE and proceed */
  		if (req->cqe.res && !(req->apoll_events & EPOLLONESHOT)) {
-			__poll_t mask = mangle_poll(req->cqe.res & req->apoll_events);
-			bool filled;
-
-			spin_lock(&ctx->completion_lock);
-			filled = io_fill_cqe_aux(ctx, req->cqe.user_data, mask,
-						 IORING_CQE_F_MORE);
-			io_commit_cqring(ctx);
-			spin_unlock(&ctx->completion_lock);
-			if (unlikely(!filled))
-				return -ECANCELED;
-			io_cqring_ev_posted(ctx);
+			if (req->flags & REQ_F_APOLL_MULTISHOT) {
+				io_tw_lock(req->ctx, locked);
+				if (likely(!(req->task->flags & PF_EXITING)))
+					io_queue_sqe(req);

That looks dangerous, io_queue_sqe() usually takes the request ownership
and doesn't expect that someone, i.e. io_poll_check_events(), may still be
actively using it.

E.g. io_accept() fails on fd < 0, return an error,
io_queue_sqe() -> io_queue_async() -> io_req_complete_failed()
kills it. Then io_poll_check_events() and polling in general
carry on using the freed request => UAF. Didn't look at it
too carefully, but there might other similar cases.


+				else
+					return -EFAULT;
+			} else {
+				__poll_t mask = mangle_poll(req->cqe.res &
+							    req->apoll_events);
+				bool filled;
+
+				spin_lock(&ctx->completion_lock);
+				filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+							 mask, IORING_CQE_F_MORE);
+				io_commit_cqring(ctx);
+				spin_unlock(&ctx->completion_lock);
+				if (unlikely(!filled))
+					return -ECANCELED;
+				io_cqring_ev_posted(ctx);
+			}
  		} else if (req->cqe.res) {
  			return 0;
  		}
@@ -6010,7 +6019,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
  	struct io_ring_ctx *ctx = req->ctx;
  	int ret;
- ret = io_poll_check_events(req, *locked);
+	ret = io_poll_check_events(req, locked);
  	if (ret > 0)
  		return;
@@ -6035,7 +6044,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
  	struct io_ring_ctx *ctx = req->ctx;
  	int ret;
- ret = io_poll_check_events(req, *locked);
+	ret = io_poll_check_events(req, locked);
  	if (ret > 0)
  		return;
@@ -6275,7 +6284,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
  	struct io_ring_ctx *ctx = req->ctx;
  	struct async_poll *apoll;
  	struct io_poll_table ipt;
-	__poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+	__poll_t mask = POLLERR | POLLPRI;
  	int ret;
if (!def->pollin && !def->pollout)
@@ -6284,6 +6293,8 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
  		return IO_APOLL_ABORTED;
  	if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
  		return IO_APOLL_ABORTED;
+	if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+		mask |= EPOLLONESHOT;
if (def->pollin) {
  		mask |= POLLIN | POLLRDNORM;

--
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux