First of all don't spin in io_ring_ctx_wait_and_kill() on iopoll. Requests won't complete faster because of that, but only lengthen io_uring_release(). The same goes for offloaded cleanup in io_ring_exit_work() -- it already has waiting loop, don't do blocking active spinning. For that, pass min=0 into io_iopoll_[try_]reap_events(), so it won't actively spin. Leave the function if io_do_iopoll() there can't complete a request to sleep in io_ring_exit_work(). Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9c3f9ccb850d..bcf6bf799a61 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2048,7 +2048,7 @@ static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, * We can't just wait for polled events to come to us, we have to actively * find and complete them. */ -static void io_iopoll_reap_events(struct io_ring_ctx *ctx) +static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) { if (!(ctx->flags & IORING_SETUP_IOPOLL)) return; @@ -2057,8 +2057,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) while (!list_empty(&ctx->poll_list)) { unsigned int nr_events = 0; - io_do_iopoll(ctx, &nr_events, 1); + io_do_iopoll(ctx, &nr_events, 0); + /* let it sleep and repeat later if can't complete a request */ + if (nr_events == 0) + break; /* * Ensure we allow local-to-the-cpu processing to take place, * in this case we need to ensure that we reap all events. @@ -7634,7 +7637,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->sqo_mm = NULL; } - io_iopoll_reap_events(ctx); io_sqe_buffer_unregister(ctx); io_sqe_files_unregister(ctx); io_eventfd_unregister(ctx); @@ -7701,11 +7703,8 @@ static int io_remove_personalities(int id, void *p, void *data) static void io_ring_exit_work(struct work_struct *work) { - struct io_ring_ctx *ctx; - - ctx = container_of(work, struct io_ring_ctx, exit_work); - if (ctx->rings) - io_cqring_overflow_flush(ctx, true); + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, + exit_work); /* * If we're doing polled IO and end up having requests being @@ -7713,11 +7712,11 @@ static void io_ring_exit_work(struct work_struct *work) * we're waiting for refs to drop. We need to reap these manually, * as nobody else will be looking for them. */ - while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) { - io_iopoll_reap_events(ctx); + do { if (ctx->rings) io_cqring_overflow_flush(ctx, true); - } + io_iopoll_try_reap_events(ctx); + } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -7733,10 +7732,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) if (ctx->io_wq) io_wq_cancel_all(ctx->io_wq); - io_iopoll_reap_events(ctx); /* if we failed setting up the ctx, we might not have any rings */ if (ctx->rings) io_cqring_overflow_flush(ctx, true); + io_iopoll_try_reap_events(ctx); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); INIT_WORK(&ctx->exit_work, io_ring_exit_work); queue_work(system_wq, &ctx->exit_work); -- 2.24.0