Wire up using an io_batch for f_op->iopoll(). If the lower stack supports it, we can handle high rates of polled IO more efficiently. This raises the single core efficiency on my system from ~6.1M IOPS to ~6.6M IOPS running a random read workload at depth 128 on two gen2 Optane drives. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 082ff64c1bcb..5c031ab8f77f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2390,6 +2390,8 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) { struct io_wq_work_node *pos, *start, *prev; unsigned int poll_flags = BLK_POLL_NOSLEEP; + struct file *file = NULL; + DEFINE_IO_BATCH(iob); int nr_events = 0; /* @@ -2404,6 +2406,11 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) struct kiocb *kiocb = &req->rw.kiocb; int ret; + if (!file) + file = kiocb->ki_filp; + else if (file != kiocb->ki_filp) + break; + /* * Move completed and retryable entries to our local lists. * If we find a request that requires polling, break out @@ -2412,19 +2419,21 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (READ_ONCE(req->iopoll_completed)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ - if (READ_ONCE(req->iopoll_completed)) + if (iob.req_list || READ_ONCE(req->iopoll_completed)) break; } - if (!pos) + if (!pos && !iob.req_list) return 0; + if (iob.req_list) + iob.complete(&iob); prev = start; wq_list_for_each_resume(pos, prev) { -- 2.33.0