IOPOLL skips completion locking but keeps it under uring_lock, thus io_cqring_overflow_flush() and so io_cqring_events() need extra care. Add extra conditional locking around them. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 00dd85acd039..a4deef746bc3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2312,7 +2312,8 @@ static void io_double_put_req(struct io_kiocb *req) io_free_req(req); } -static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) +static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush, + bool iopoll_lock) { if (test_bit(0, &ctx->cq_check_overflow)) { /* @@ -2323,7 +2324,15 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) if (noflush) return -1U; + /* + * iopoll doesn't care about ctx->completion_lock but uses + * ctx->uring_lock + */ + if (iopoll_lock) + mutex_lock(&ctx->uring_lock); io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (iopoll_lock) + mutex_unlock(&ctx->uring_lock); } /* See comment at the top of this file */ @@ -2550,7 +2559,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * If we do, we can potentially be spinning for commands that * already triggered a CQE (eg in error). */ - if (io_cqring_events(ctx, false)) + if (io_cqring_events(ctx, false, false)) break; /* @@ -7097,7 +7106,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx, noflush) >= iowq->to_wait || + return io_cqring_events(ctx, noflush, false) >= iowq->to_wait || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -7142,13 +7151,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, .ctx = ctx, .to_wait = min_events, }; + bool iopoll = ctx->flags & IORING_SETUP_IOPOLL; struct io_rings *rings = ctx->rings; struct timespec64 ts; signed long timeout = 0; int ret = 0; do { - if (io_cqring_events(ctx, false) >= min_events) + if (io_cqring_events(ctx, false, iopoll) >= min_events) return 0; if (!io_run_task_work()) break; @@ -7184,7 +7194,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, continue; else if (ret < 0) break; - if (io_should_wake(&iowq, false)) + /* iopoll ignores completion_lock, so not safe to flush */ + if (io_should_wake(&iowq, iopoll)) break; if (uts) { timeout = schedule_timeout(timeout); @@ -8623,7 +8634,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - if (io_cqring_events(ctx, false)) + if (io_cqring_events(ctx, false, ctx->flags & IORING_SETUP_IOPOLL)) mask |= EPOLLIN | EPOLLRDNORM; return mask; -- 2.24.0