IOPOLL users should care more about getting completions for requests they submitted, but not in "device did/completed something". Currently, io_do_iopoll() may return a positive number, which will instruct io_iopoll_check() to break the loop and end the syscall, even if there is not enough CQEs or none at all. Don't return positive numbers, so io_iopoll_check() exits only when it gets an actual error, need reschedule or got enough CQEs. Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6dca33fdb012..87d9a5d54464 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2276,7 +2276,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, struct io_kiocb *req, *tmp; LIST_HEAD(done); bool spin; - int ret; /* * Only spin for completions if we don't have multiple devices hanging @@ -2284,9 +2283,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, */ spin = !ctx->poll_multi_queue && *nr_events < min; - ret = 0; list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { struct kiocb *kiocb = &req->rw.kiocb; + int ret; /* * Move completed and retryable entries to our local lists. @@ -2301,22 +2300,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, break; ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); - if (ret < 0) - break; + if (unlikely(ret < 0)) + return ret; + else if (ret) + spin = false; /* iopoll may have completed current req */ if (READ_ONCE(req->iopoll_completed)) list_move_tail(&req->inflight_entry, &done); - - if (ret && spin) - spin = false; - ret = 0; } if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done, resubmit); - return ret; + return 0; } /* -- 2.32.0