In the spirit of fairness, cap the max number of SQ entries we'll submit for SQPOLL if we have multiple rings. If we don't do that, we could be submitting tons of entries for one ring, while others are waiting to get service. The value of 8 is somewhat arbitrarily chosen as something that allows a fair bit of batching, without using an excessive time per ring. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e21a7a9c6a59..ef86aa2a577d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6687,7 +6687,7 @@ enum sq_ret { }; static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, - unsigned long start_jiffies) + unsigned long start_jiffies, bool cap_entries) { unsigned long timeout = start_jiffies + ctx->sq_thread_idle; struct io_sq_data *sqd = ctx->sq_data; @@ -6755,6 +6755,10 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, io_ring_clear_wakeup_flag(ctx); } + /* if we're handling multiple rings, cap submit size for fairness */ + if (cap_entries && to_submit > 8) + to_submit = 8; + mutex_lock(&ctx->uring_lock); if (likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit, ctx->ring_file, ctx->ring_fd); @@ -6789,6 +6793,7 @@ static int io_sq_thread(void *data) start_jiffies = jiffies; while (!kthread_should_stop()) { enum sq_ret ret = 0; + bool cap_entries; /* * Any changes to the sqd lists are synchronized through the @@ -6801,6 +6806,8 @@ static int io_sq_thread(void *data) if (unlikely(!list_empty(&sqd->ctx_new_list))) io_sqd_init_new(sqd); + cap_entries = !list_is_singular(&sqd->ctx_list); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { if (current->cred != ctx->creds) { if (old_cred) @@ -6814,7 +6821,7 @@ static int io_sq_thread(void *data) task_unlock(current); } - ret |= __io_sq_thread(ctx, start_jiffies); + ret |= __io_sq_thread(ctx, start_jiffies, cap_entries); io_sq_thread_drop_mm(); } -- 2.28.0 -- Jens Axboe