there are many small reasons justifying this change. 1. busy poll must be performed even on rings that have no iopoll and no new sqe. It is quite possible that a ring configured for inbound traffic with multishot be several hours without receiving new request submissions 2. NAPI busy poll does not perform any credential validation 3. If the thread is awaken by task work, processing the task work is prioritary over NAPI busy loop. This is why a second loop has been created after the io_sq_tw() call instead of doing the busy loop in __io_sq_thread() outside its credential acquisition block. Signed-off-by: Olivier Langlois <olivier@xxxxxxxxxxxxxx> --- io_uring/napi.h | 9 +++++++++ io_uring/sqpoll.c | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/io_uring/napi.h b/io_uring/napi.h index 88f1c21d5548..5506c6af1ff5 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -101,4 +101,13 @@ static inline int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) } #endif /* CONFIG_NET_RX_BUSY_POLL */ +static inline int io_do_sqpoll_napi(struct io_ring_ctx *ctx) +{ + int ret = 0; + + if (io_napi(ctx)) + ret = io_napi_sqpoll_busy_poll(ctx); + return ret; +} + #endif diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index cc4a25136030..7f4ed7920a90 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -195,9 +195,6 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); - if (io_napi(ctx)) - ret += io_napi_sqpoll_busy_poll(ctx); - if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); if (creds) @@ -322,6 +319,9 @@ static int io_sq_thread(void *data) if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + io_do_sqpoll_napi(ctx); + } if (sqt_spin || !time_after(jiffies, timeout)) { if (sqt_spin) { io_sq_update_worktime(sqd, &start); -- 2.46.0