Jens Axboe <axboe@xxxxxxxxx> writes: > On 4/25/23 12:18?PM, Stefan Roesch wrote: > > Not too much to complain about, just some minor cleanups that would be > nice to do. > >> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h >> index 1b2a20a42413..2b2ca990ee93 100644 >> --- a/include/linux/io_uring_types.h >> +++ b/include/linux/io_uring_types.h >> @@ -277,6 +278,15 @@ struct io_ring_ctx { >> struct xarray personalities; >> u32 pers_next; >> >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + struct list_head napi_list; /* track busy poll napi_id */ >> + spinlock_t napi_lock; /* napi_list lock */ >> + >> + DECLARE_HASHTABLE(napi_ht, 4); >> + unsigned int napi_busy_poll_to; /* napi busy poll default timeout */ >> + bool napi_prefer_busy_poll; >> +#endif >> + > > I don't mind overly long lines if it's warranted, for a comment it is > not. This should just go above the variable. > Fixed. I was just following what sq_creds was doing a bit earlier in the file. >> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c >> index efbd6c9c56e5..fff8f84eb560 100644 >> --- a/io_uring/io_uring.c >> +++ b/io_uring/io_uring.c >> iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; >> iowq.timeout = KTIME_MAX; >> >> - if (uts) { >> - struct timespec64 ts; >> + if (!io_napi(ctx)) { >> + if (uts) { >> + struct timespec64 ts; >> >> - if (get_timespec64(&ts, uts)) >> - return -EFAULT; >> - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); >> + if (get_timespec64(&ts, uts)) >> + return -EFAULT; >> + iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); >> + } >> + } else { >> + if (uts) { >> + struct timespec64 ts; >> + >> + if (get_timespec64(&ts, uts)) >> + return -EFAULT; >> + >> + io_napi_adjust_timeout(ctx, &iowq, &ts); >> + iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); >> + } else { >> + io_napi_adjust_timeout(ctx, &iowq, NULL); >> + } >> + io_napi_busy_loop(ctx, &iowq); >> } > > This is a little bit of a mess and has a lot of duplication, that is not > ideal. I'd do something like the end-of-email incremental to avoid that. > Note that it's totally untested... > >> trace_io_uring_cqring_wait(ctx, min_events); >> + >> do { >> unsigned long check_cq; >> > > Spurious line addition here. > Fixed. diff --git a/io_uring/napi.c b/io_uring/napi.c >> new file mode 100644 >> index 000000000000..bb7d2b6b7e90 >> --- /dev/null >> +++ b/io_uring/napi.c >> +static inline void adjust_timeout(unsigned int poll_to, struct timespec64 *ts, >> + unsigned int *new_poll_to) >> +{ >> + struct timespec64 pollto = ns_to_timespec64(1000 * (s64)poll_to); > > There's a bunch of these, but I'll just mention it here - io_uring > always just aligns a second line of arguments with the first one. We > should do that here too. > Fixed. >> + if (timespec64_compare(ts, &pollto) > 0) { >> + *ts = timespec64_sub(*ts, pollto); >> + *new_poll_to = poll_to; >> + } else { >> + u64 to = timespec64_to_ns(ts); >> + >> + do_div(to, 1000); > > Is this going to complain on 32-bit? > My understanding is this should work on 32-bit. >> +static void io_napi_multi_busy_loop(struct list_head *napi_list, >> + struct io_wait_queue *iowq) >> +{ >> + unsigned long start_time = busy_loop_current_time(); >> + >> + do { >> + if (list_is_singular(napi_list)) >> + break; >> + if (!__io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll)) >> + break; >> + } while (!io_napi_busy_loop_should_end(iowq, start_time)); >> +} > > Do we need to check for an empty list here? > This function is only called through io_cqring_wait(), io_napi_busy_loop(). In io_cqring_wait() we check that the napi list is not empty. >> +static void io_napi_blocking_busy_loop(struct list_head *napi_list, >> + struct io_wait_queue *iowq) >> +{ >> + if (!list_is_singular(napi_list)) >> + io_napi_multi_busy_loop(napi_list, iowq); >> + >> + if (list_is_singular(napi_list)) { >> + struct io_napi_ht_entry *ne; >> + >> + ne = list_first_entry(napi_list, struct io_napi_ht_entry, list); >> + napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, >> + iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); >> + } >> +} > > Presumably io_napi_multi_busy_loop() can change the state of the list, > which is why we have if (cond) and then if (!cond) here? Would probably > warrant a comment as it looks a bit confusing. > I added a comment. >> +/* >> + * io_napi_adjust_timeout() - Add napi id to the busy poll list >> + * @ctx: pointer to io-uring context structure >> + * @iowq: pointer to io wait queue >> + * @ts: pointer to timespec or NULL >> + * >> + * Adjust the busy loop timeout according to timespec and busy poll timeout. >> + */ >> +void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, >> + struct timespec64 *ts) >> +{ >> + if (ts) >> + adjust_timeout(READ_ONCE(ctx->napi_busy_poll_to), ts, >> + &iowq->napi_busy_poll_to); >> + else >> + iowq->napi_busy_poll_to = READ_ONCE(ctx->napi_busy_poll_to); >> +} > > We should probably just pass 'ctx' to adjust_timeout()? Or do > > unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); > > at the top and then use that for both. Would get rid of that overly long > line too. > > I think it makes sense to combine the two functions. I'll also add a variable at the top of the function like your example above. > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index a4c9a404f631..390f54c546d6 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -2617,29 +2617,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, > iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; > iowq.timeout = KTIME_MAX; > > - if (!io_napi(ctx)) { > - if (uts) { > - struct timespec64 ts; > + if (uts) { > + struct timespec64 ts; > > - if (get_timespec64(&ts, uts)) > - return -EFAULT; > - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); > - } > - } else { > - if (uts) { > - struct timespec64 ts; > - > - if (get_timespec64(&ts, uts)) > - return -EFAULT; > - > - io_napi_adjust_timeout(ctx, &iowq, &ts); > - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); > - } else { > - io_napi_adjust_timeout(ctx, &iowq, NULL); > - } > - io_napi_busy_loop(ctx, &iowq); > + if (get_timespec64(&ts, uts)) > + return -EFAULT; > + iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); > + io_napi_adjust_timeout(ctx, &iowq, &ts); > } > > + io_napi_busy_loop(ctx, &iowq); > + > trace_io_uring_cqring_wait(ctx, min_events); > > do { > diff --git a/io_uring/napi.c b/io_uring/napi.c > index ca12ff5f5611..3a0d0317ceec 100644 > --- a/io_uring/napi.c > +++ b/io_uring/napi.c > @@ -100,7 +100,8 @@ static bool io_napi_busy_loop_should_end(void *p, unsigned long start_time) > io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to); > } > > -static bool __io_napi_busy_loop(struct list_head *napi_list, bool prefer_busy_poll) > +static bool __io_napi_do_busy_loop(struct list_head *napi_list, > + bool prefer_busy_poll) > { > struct io_napi_ht_entry *e; > struct io_napi_ht_entry *n; > @@ -121,7 +122,7 @@ static void io_napi_multi_busy_loop(struct list_head *napi_list, > do { > if (list_is_singular(napi_list)) > break; > - if (!__io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll)) > + if (!__io_napi_do_busy_loop(napi_list, iowq->napi_prefer_busy_poll)) > break; > } while (!io_napi_busy_loop_should_end(iowq, start_time)); > } > @@ -251,16 +252,18 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) > } > > /* > - * io_napi_adjust_timeout() - Add napi id to the busy poll list > + * __io_napi_adjust_timeout() - Add napi id to the busy poll list > * @ctx: pointer to io-uring context structure > * @iowq: pointer to io wait queue > * @ts: pointer to timespec or NULL > * > * Adjust the busy loop timeout according to timespec and busy poll timeout. > */ > -void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > - struct timespec64 *ts) > +void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq, struct timespec64 *ts) > { > + if (!io_napi(ctx)) > + return; > if (ts) > adjust_timeout(READ_ONCE(ctx->napi_busy_poll_to), ts, > &iowq->napi_busy_poll_to); > @@ -269,13 +272,13 @@ void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > } > > /* > - * io_napi_busy_loop() - execute busy poll loop > + * __io_napi_busy_loop() - execute busy poll loop > * @ctx: pointer to io-uring context structure > * @iowq: pointer to io wait queue > * > * Execute the busy poll loop and merge the spliced off list. > */ > -void io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) > +void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) > { > iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); > > @@ -302,8 +305,8 @@ void io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) > */ > int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) > { > - int ret = 0; > LIST_HEAD(napi_list); > + int ret; > > if (!READ_ONCE(ctx->napi_busy_poll_to)) > return 0; > @@ -312,9 +315,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) > list_splice_init(&ctx->napi_list, &napi_list); > spin_unlock(&ctx->napi_lock); > > - if (__io_napi_busy_loop(&napi_list, ctx->napi_prefer_busy_poll)) > - ret = 1; > - > + ret = __io_napi_do_busy_loop(&napi_list, ctx->napi_prefer_busy_poll); > io_napi_merge_lists(ctx, &napi_list); > return ret; > } > diff --git a/io_uring/napi.h b/io_uring/napi.h > index 8da8f032a441..b5e93b3777c0 100644 > --- a/io_uring/napi.h > +++ b/io_uring/napi.h > @@ -17,9 +17,9 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); > > void __io_napi_add(struct io_ring_ctx *ctx, struct file *file); > > -void io_napi_adjust_timeout(struct io_ring_ctx *ctx, > +void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, > struct io_wait_queue *iowq, struct timespec64 *ts); > -void io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); > +void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); > int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); > > static inline bool io_napi(struct io_ring_ctx *ctx) > @@ -27,6 +27,23 @@ static inline bool io_napi(struct io_ring_ctx *ctx) > return !list_empty(&ctx->napi_list); > } > > +static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq, > + struct timespec64 *ts) > +{ > + if (!io_napi(ctx)) > + return; > + __io_napi_adjust_timeout(ctx, iowq, ts); > +} > + > +static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq) > +{ > + if (!io_napi(ctx)) > + return; > + __io_napi_busy_loop(ctx, iowq); > +} > + > /* > * io_napi_add() - Add napi id to the busy poll list > * @req: pointer to io_kiocb request > I'll have a look at the above proposal.