On 5/6/22 08:01, Hao Xu wrote:
From: Hao Xu <howeyxu@xxxxxxxxxxx>
For operations like accept, multishot is a useful feature, since we can
reduce a number of accept sqe. Let's integrate it to fast poll, it may
be good for other operations in the future.
Signed-off-by: Hao Xu <howeyxu@xxxxxxxxxxx>
---
fs/io_uring.c | 41 ++++++++++++++++++++++++++---------------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8ebb1a794e36..d33777575faf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5952,7 +5952,7 @@ static void io_poll_remove_entries(struct io_kiocb *req)
* either spurious wakeup or multishot CQE is served. 0 when it's done with
* the request, then the mask is stored in req->cqe.res.
*/
-static int io_poll_check_events(struct io_kiocb *req, bool locked)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
int v;
@@ -5981,17 +5981,26 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
/* multishot, just fill an CQE and proceed */
if (req->cqe.res && !(req->apoll_events & EPOLLONESHOT)) {
- __poll_t mask = mangle_poll(req->cqe.res & req->apoll_events);
- bool filled;
-
- spin_lock(&ctx->completion_lock);
- filled = io_fill_cqe_aux(ctx, req->cqe.user_data, mask,
- IORING_CQE_F_MORE);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- if (unlikely(!filled))
- return -ECANCELED;
- io_cqring_ev_posted(ctx);
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+ io_tw_lock(req->ctx, locked);
+ if (likely(!(req->task->flags & PF_EXITING)))
+ io_queue_sqe(req);
That looks dangerous, io_queue_sqe() usually takes the request ownership
and doesn't expect that someone, i.e. io_poll_check_events(), may still be
actively using it.
E.g. io_accept() fails on fd < 0, return an error,
io_queue_sqe() -> io_queue_async() -> io_req_complete_failed()
kills it. Then io_poll_check_events() and polling in general
carry on using the freed request => UAF. Didn't look at it
too carefully, but there might other similar cases.
+ else
+ return -EFAULT;
+ } else {
+ __poll_t mask = mangle_poll(req->cqe.res &
+ req->apoll_events);
+ bool filled;
+
+ spin_lock(&ctx->completion_lock);
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+ mask, IORING_CQE_F_MORE);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ if (unlikely(!filled))
+ return -ECANCELED;
+ io_cqring_ev_posted(ctx);
+ }
} else if (req->cqe.res) {
return 0;
}
@@ -6010,7 +6019,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req, *locked);
+ ret = io_poll_check_events(req, locked);
if (ret > 0)
return;
@@ -6035,7 +6044,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req, *locked);
+ ret = io_poll_check_events(req, locked);
if (ret > 0)
return;
@@ -6275,7 +6284,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
struct io_ring_ctx *ctx = req->ctx;
struct async_poll *apoll;
struct io_poll_table ipt;
- __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+ __poll_t mask = POLLERR | POLLPRI;
int ret;
if (!def->pollin && !def->pollout)
@@ -6284,6 +6293,8 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
return IO_APOLL_ABORTED;
if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
return IO_APOLL_ABORTED;
+ if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+ mask |= EPOLLONESHOT;
if (def->pollin) {
mask |= POLLIN | POLLRDNORM;
--
Pavel Begunkov