Wire up using an io_comp_batch for f_op->iopoll(). If the lower stack supports it, we can handle high rates of polled IO more efficiently. This raises the single core efficiency on my system from ~6.1M IOPS to ~6.6M IOPS running a random read workload at depth 128 on two gen2 Optane drives. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fed8ec12a36d..57fb6f13b50b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2428,6 +2428,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) { struct io_wq_work_node *pos, *start, *prev; unsigned int poll_flags = BLK_POLL_NOSLEEP; + DEFINE_IO_COMP_BATCH(iob); int nr_events = 0; /* @@ -2450,18 +2451,21 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (READ_ONCE(req->iopoll_completed)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ - if (READ_ONCE(req->iopoll_completed)) + if (!rq_list_empty(iob.req_list) || + READ_ONCE(req->iopoll_completed)) break; } - if (!pos) + if (iob.req_list) + iob.complete(&iob); + else if (!pos) return 0; prev = start; -- 2.33.1