Re: napi_busy_poll

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



在 2022/2/16 上午2:05, Olivier Langlois 写道:
On Tue, 2022-02-15 at 03:37 -0500, Olivier Langlois wrote:

That being said, I have not been able to make it work yet. For some
unknown reasons, no valid napi_id is extracted from the sockets added
to the context so the net_busy_poll function is never called.

I find that very strange since prior to use io_uring, my code was
using
epoll and the busy polling was working fine with my application
sockets. Something is escaping my comprehension. I must tired and
this
will become obvious...

The napi_id values associated with my sockets appear to be in the range
0 < napi_id < MIN_NAPI_ID

from busy_loop.h:
/*		0 - Reserved to indicate value not set
  *     1..NR_CPUS - Reserved for sender_cpu
  *  NR_CPUS+1..~0 - Region available for NAPI IDs
  */
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))

I have found this:
https://lwn.net/Articles/619862/

hinting that busy_poll may be incompatible with RPS
(Documentation/networking/scaling.rst) that I may have discovered
*AFTER* my epoll -> io_uring transition (I don't recall exactly the
sequence of my learning process).

With my current knowledge, it makes little sense why busy polling would
not be possible with RPS. Also, what exactly is a NAPI device is quite
nebulous to me... Looking into the Intel igb driver code, it seems like
1 NAPI device is created for each interrupt vector/Rx buffer of the
device.

Bottomline, it seems like I have fallen into a new rabbit hole. It may
take me a day or 2 to figure it all... you are welcome to enlight me if
you know a thing or 2 about those topics... I am kinda lost right
now...

Hi Olivier,
I've write something to express my idea, it would be great if you can
try it.
It's totally untested and only does polling in sqthread, won't be hard
to expand it to cqring_wait. My original idea is to poll all the napi
device but seems that may be not efficient. so for a request, just
do napi polling for one napi.
There is still one problem: when to delete the polled NAPIs.

Regards,
Hao

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 538f90bd0508..2e32d5fe0641 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,6 +63,7 @@
 #include <net/sock.h>
 #include <net/af_unix.h>
 #include <net/scm.h>
+#include <net/busy_poll.h>
 #include <linux/anon_inodes.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
@@ -443,6 +444,7 @@ struct io_ring_ctx {
                spinlock_t                      rsrc_ref_lock;
        };

+       struct list_head                napi_list;
        /* Keep this last, we don't need it for the fast path */
        struct {
                #if defined(CONFIG_UNIX)
@@ -1457,6 +1459,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        INIT_WQ_LIST(&ctx->locked_free_list);
        INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
        INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+       INIT_LIST_HEAD(&ctx->napi_list);
        return ctx;
 err:
        kfree(ctx->dummy_ubuf);
@@ -5419,6 +5422,70 @@ IO_NETOP_FN(send);
 IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */

+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct napi_entry {
+       struct list_head        list;
+       unsigned int            napi_id;
+};
+
+static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+       unsigned int napi_id;
+       struct socket *sock;
+       struct sock *sk;
+       struct napi_entry *ne;
+
+       if (!net_busy_loop_on())
+               return;
+
+       sock = sock_from_file(file);
+       if (!sock)
+               return;
+
+       sk = sock->sk;
+       if (!sk)
+               return;
+
+       napi_id = READ_ONCE(sk->sk_napi_id);
+       if (napi_id < MIN_NAPI_ID)
+               return;
+
+       list_for_each_entry(ne, &ctx->napi_list, list) {
+               if (ne->napi_id == napi_id)
+                       return;
+       }
+
+       ne = kmalloc(sizeof(*ne), GFP_KERNEL);
+       if (!ne)
+               return;
+
+       list_add_tail(&ne->list, &ctx->napi_list);
+}
+
+static void io_napi_busy_loop(struct io_ring_ctx *ctx)
+{
+       struct napi_entry *ne;
+
+       if (list_empty(&ctx->napi_list) || !net_busy_loop_on())
+               return;
+
+       list_for_each_entry(ne, &ctx->napi_list, list)
+ napi_busy_loop(ne->napi_id, NULL, NULL, false, BUSY_POLL_BUDGET);
+}
+#else
+
+static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+       return;
+}
+
+static inline void io_napi_busy_loop(struct io_ring_ctx *ctx)
+{
+       return;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+
 struct io_poll_table {
        struct poll_table_struct pt;
        struct io_kiocb *req;
@@ -5583,6 +5650,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;

+       io_add_napi(req->file, req->ctx);
        ret = io_poll_check_events(req);
        if (ret > 0)
                return;
@@ -5608,6 +5676,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;

+       io_add_napi(req->file, req->ctx);
        ret = io_poll_check_events(req);
        if (ret > 0)
                return;
@@ -7544,6 +7613,9 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
                        wake_up(&ctx->sqo_sq_wait);
                if (creds)
                        revert_creds(creds);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+               io_napi_busy_loop(ctx);
+#endif
        }

        return ret;




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux