The long term goal is to lay out a framework to be able to offer different napi tracking strategies to the user. The obvious first alternative strategy is the static tracking where the user would update manually the napi_list to remove the overhead made by io_uring managing the list dynamically. Signed-off-by: Olivier Langlois <olivier@xxxxxxxxxxxxxx> --- include/linux/io_uring_types.h | 12 +++++- io_uring/fdinfo.c | 4 ++ io_uring/napi.c | 76 ++++++++++++++++++++++++++++++---- io_uring/napi.h | 11 +---- 4 files changed, 86 insertions(+), 17 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3315005df117..c1d1b28f8cca 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -217,6 +217,16 @@ struct io_alloc_cache { size_t elem_size; }; +#ifdef CONFIG_NET_RX_BUSY_POLL +struct io_napi_tracking_ops { + void (*add_id)(struct io_kiocb *req); + bool (*do_busy_loop)(struct io_ring_ctx *ctx, + void *loop_end_arg); + void (*show_fdinfo)(struct io_ring_ctx *ctx, + struct seq_file *m); +}; +#endif + struct io_ring_ctx { /* const or read-mostly hot data */ struct { @@ -402,11 +412,11 @@ struct io_ring_ctx { #ifdef CONFIG_NET_RX_BUSY_POLL struct list_head napi_list; /* track busy poll napi_id */ spinlock_t napi_lock; /* napi_list lock */ + struct io_napi_tracking_ops *napi_ops; /* napi busy poll default timeout */ ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; - bool napi_enabled; DECLARE_HASHTABLE(napi_ht, 4); #endif diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index b1e0e0d85349..fa773687a684 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -223,5 +223,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } spin_unlock(&ctx->completion_lock); +#ifdef CONFIG_NET_RX_BUSY_POLL + ctx->napi_ops->show_fdinfo(ctx, m); +#endif + } #endif diff --git a/io_uring/napi.c b/io_uring/napi.c index 1de1d4d62925..75ac850af0c0 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -38,7 +38,7 @@ static inline ktime_t net_to_ktime(unsigned long t) return ns_to_ktime(t << 10); } -void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) +static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) { struct hlist_head *hash_list; unsigned int napi_id; @@ -136,8 +136,52 @@ static bool io_napi_busy_loop_should_end(void *data, return false; } -static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, - void *loop_end_arg) +/* + * does not perform any busy polling but still check if list entries are + * stalled if the list is not empty. This could happen by unregistering + * napi after having enabled it for some time. + */ +static bool no_tracking_do_busy_loop(struct io_ring_ctx *ctx, + void *loop_end_arg) +{ + struct io_napi_entry *e; + bool is_stale = false; + + list_for_each_entry_rcu(e, &ctx->napi_list, list) { + if (time_after(jiffies, e->timeout)) + is_stale = true; + } + + return is_stale; +} + +static void no_tracking_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ + seq_puts(m, "NAPI:\tdisabled\n"); +} + +/* + * default ops for a newly created ring for which NAPI busy poll is not enabled + */ +static struct io_napi_tracking_ops no_tracking_ops = { + .add_id = NULL, + .do_busy_loop = no_tracking_do_busy_loop, + .show_fdinfo = no_tracking_show_fdinfo, +}; + +static void dynamic_tracking_add_id(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + struct socket *sock; + + sock = sock_from_file(req->file); + if (sock) + __io_napi_add(ctx, sock); +} + +static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx, + void *loop_end_arg) { struct io_napi_entry *e; bool (*loop_end)(void *, unsigned long) = NULL; @@ -157,6 +201,23 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, return is_stale; } +static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ + seq_puts(m, "NAPI:\tenabled\n"); + seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to); + if (ctx->napi_prefer_busy_poll) + seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); + else + seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); +} + +static struct io_napi_tracking_ops dynamic_tracking_ops = { + .add_id = dynamic_tracking_add_id, + .do_busy_loop = dynamic_tracking_do_busy_loop, + .show_fdinfo = dynamic_tracking_show_fdinfo, +}; + static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { @@ -172,7 +233,7 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, rcu_read_lock(); do { - is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); + is_stale = ctx->napi_ops->do_busy_loop(ctx, loop_end_arg); } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); rcu_read_unlock(); @@ -193,6 +254,7 @@ void io_napi_init(struct io_ring_ctx *ctx) spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); + ctx->napi_ops = &no_tracking_ops; } /* @@ -241,7 +303,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); - WRITE_ONCE(ctx->napi_enabled, true); + WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops); return 0; } @@ -265,7 +327,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_enabled, false); + WRITE_ONCE(ctx->napi_ops, &no_tracking_ops); return 0; } @@ -321,7 +383,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) return 0; rcu_read_lock(); - is_stale = __io_napi_do_busy_loop(ctx, NULL); + is_stale = ctx->napi_ops->do_busy_loop(ctx, NULL); rcu_read_unlock(); io_napi_remove_stale(ctx, is_stale); diff --git a/io_uring/napi.h b/io_uring/napi.h index 27b88c3eb428..3d68d8e7b108 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -15,8 +15,6 @@ void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); -void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); - void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, ktime_t to_wait); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); @@ -53,14 +51,9 @@ static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, static inline void io_napi_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct socket *sock; - - if (!READ_ONCE(ctx->napi_enabled)) - return; - sock = sock_from_file(req->file); - if (sock) - __io_napi_add(ctx, sock); + if (ctx->napi_ops->add_id) + ctx->napi_ops->add_id(req); } #else -- 2.46.0