On 5/2/23 10:53?AM, Stefan Roesch wrote: > diff --git a/io_uring/napi.c b/io_uring/napi.c > new file mode 100644 > index 000000000000..76bcabeb7ef0 > --- /dev/null > +++ b/io_uring/napi.c > +static bool io_napi_busy_loop_should_end(void *data, > + unsigned long start_time) > +{ > + struct io_wait_queue *iowq = data; > + > + if (signal_pending(current)) > + return true; > + if (io_should_wake(iowq)) > + return true; > + if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) > + return true; > + > + return false; > +} > + > +static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, > + void *loop_end_arg) > +{ > + struct io_napi_entry *e; > + bool (*loop_end)(void *, unsigned long) = NULL; > + bool is_stale = false; > + > + if (loop_end_arg) > + loop_end = io_napi_busy_loop_should_end; > + > + list_for_each_entry_rcu(e, &ctx->napi_list, list) { > + napi_busy_loop(e->napi_id, loop_end, loop_end_arg, > + ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); > + > + if (time_after(jiffies, e->timeout)) > + is_stale = true; > + } > + > + return is_stale; > +} > + > +static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq) > +{ > + unsigned long start_time = busy_loop_current_time(); > + void *loop_end_arg = NULL; > + bool is_stale = false; > + > + /* Singular lists use a different napi loop end check function and are > + * only executed once. > + */ > + if (list_is_singular(&ctx->napi_list)) > + loop_end_arg = iowq; > + > + rcu_read_lock(); > + do { > + is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); > + } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); > + rcu_read_unlock(); > + > + io_napi_remove_stale(ctx, is_stale); > +} I mentioned this in our out-of-band discussions on this patch set, and we cannot call napi_busy_loop() under rcu_read_lock() if loop_end and loop_end_arg is set AND loop_end() doesn't always return true. Because otherwise we can end up with napi_busy_loop() doing: if (unlikely(need_resched())) { if (napi_poll) busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget); preempt_enable(); rcu_read_unlock(); cond_resched(); if (loop_end(loop_end_arg, start_time)) return; goto restart; } and hence we're now scheduling with rcu read locking disabled. So we need to handle that case appropriately as well. -- Jens Axboe