On Mon, Feb 28, 2022 at 11:38 AM Harold Huang <baymaxhuang@xxxxxxxxx> wrote: > > In tun, NAPI is supported and we can also use NAPI in the path of > batched XDP buffs to accelerate packet processing. What is more, after > we use NAPI, GRO is also supported. The iperf shows that the throughput of > single stream could be improved from 4.5Gbps to 9.2Gbps. Additionally, 9.2 > Gbps nearly reachs the line speed of the phy nic and there is still about > 15% idle cpu core remaining on the vhost thread. > > Test topology: > [iperf server]<--->tap<--->dpdk testpmd<--->phy nic<--->[iperf client] > > Iperf stream: > iperf3 -c 10.0.0.2 -i 1 -t 10 > > Before: > ... > [ 5] 5.00-6.00 sec 558 MBytes 4.68 Gbits/sec 0 1.50 MBytes > [ 5] 6.00-7.00 sec 556 MBytes 4.67 Gbits/sec 1 1.35 MBytes > [ 5] 7.00-8.00 sec 556 MBytes 4.67 Gbits/sec 2 1.18 MBytes > [ 5] 8.00-9.00 sec 559 MBytes 4.69 Gbits/sec 0 1.48 MBytes > [ 5] 9.00-10.00 sec 556 MBytes 4.67 Gbits/sec 1 1.33 MBytes > - - - - - - - - - - - - - - - - - - - - - - - - - > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-10.00 sec 5.39 GBytes 4.63 Gbits/sec 72 sender > [ 5] 0.00-10.04 sec 5.39 GBytes 4.61 Gbits/sec receiver > > After: > ... > [ 5] 5.00-6.00 sec 1.07 GBytes 9.19 Gbits/sec 0 1.55 MBytes > [ 5] 6.00-7.00 sec 1.08 GBytes 9.30 Gbits/sec 0 1.63 MBytes > [ 5] 7.00-8.00 sec 1.08 GBytes 9.25 Gbits/sec 0 1.72 MBytes > [ 5] 8.00-9.00 sec 1.08 GBytes 9.25 Gbits/sec 77 1.31 MBytes > [ 5] 9.00-10.00 sec 1.08 GBytes 9.24 Gbits/sec 0 1.48 MBytes > - - - - - - - - - - - - - - - - - - - - - - - - - > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-10.00 sec 10.8 GBytes 9.28 Gbits/sec 166 sender > [ 5] 0.00-10.04 sec 10.8 GBytes 9.24 Gbits/sec receiver > > Reported-at: https://lore.kernel.org/all/CACGkMEvTLG0Ayg+TtbN4q4pPW-ycgCCs3sC3-TF8cuRTf7Pp1A@xxxxxxxxxxxxxx > Signed-off-by: Harold Huang <baymaxhuang@xxxxxxxxx> Acked-by: Jason Wang <jasowang@xxxxxxxxxx> > --- > v2 -> v3 > - return the queued NAPI packet from tun_xdp_one > > drivers/net/tun.c | 43 ++++++++++++++++++++++++++++++------------- > 1 file changed, 30 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index fed85447701a..969ea69fd29d 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -2388,9 +2388,10 @@ static int tun_xdp_one(struct tun_struct *tun, > struct virtio_net_hdr *gso = &hdr->gso; > struct bpf_prog *xdp_prog; > struct sk_buff *skb = NULL; > + struct sk_buff_head *queue; > u32 rxhash = 0, act; > int buflen = hdr->buflen; > - int err = 0; > + int ret = 0; > bool skb_xdp = false; > struct page *page; > > @@ -2405,13 +2406,13 @@ static int tun_xdp_one(struct tun_struct *tun, > xdp_set_data_meta_invalid(xdp); > > act = bpf_prog_run_xdp(xdp_prog, xdp); > - err = tun_xdp_act(tun, xdp_prog, xdp, act); > - if (err < 0) { > + ret = tun_xdp_act(tun, xdp_prog, xdp, act); > + if (ret < 0) { > put_page(virt_to_head_page(xdp->data)); > - return err; > + return ret; > } > > - switch (err) { > + switch (ret) { > case XDP_REDIRECT: > *flush = true; > fallthrough; > @@ -2435,7 +2436,7 @@ static int tun_xdp_one(struct tun_struct *tun, > build: > skb = build_skb(xdp->data_hard_start, buflen); > if (!skb) { > - err = -ENOMEM; > + ret = -ENOMEM; > goto out; > } > > @@ -2445,7 +2446,7 @@ static int tun_xdp_one(struct tun_struct *tun, > if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { > atomic_long_inc(&tun->rx_frame_errors); > kfree_skb(skb); > - err = -EINVAL; > + ret = -EINVAL; > goto out; > } > > @@ -2455,16 +2456,27 @@ static int tun_xdp_one(struct tun_struct *tun, > skb_record_rx_queue(skb, tfile->queue_index); > > if (skb_xdp) { > - err = do_xdp_generic(xdp_prog, skb); > - if (err != XDP_PASS) > + ret = do_xdp_generic(xdp_prog, skb); > + if (ret != XDP_PASS) { > + ret = 0; > goto out; > + } > } > > if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && > !tfile->detached) > rxhash = __skb_get_hash_symmetric(skb); > > - netif_receive_skb(skb); > + if (tfile->napi_enabled) { > + queue = &tfile->sk.sk_write_queue; > + spin_lock(&queue->lock); > + __skb_queue_tail(queue, skb); > + spin_unlock(&queue->lock); > + ret = 1; > + } else { > + netif_receive_skb(skb); > + ret = 0; > + } > > /* No need to disable preemption here since this function is > * always called with bh disabled > @@ -2475,7 +2487,7 @@ static int tun_xdp_one(struct tun_struct *tun, > tun_flow_update(tun, rxhash, tfile); > > out: > - return err; > + return ret; > } > > static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) > @@ -2492,7 +2504,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) > if (ctl && (ctl->type == TUN_MSG_PTR)) { > struct tun_page tpage; > int n = ctl->num; > - int flush = 0; > + int flush = 0, queued = 0; > > memset(&tpage, 0, sizeof(tpage)); > > @@ -2501,12 +2513,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) > > for (i = 0; i < n; i++) { > xdp = &((struct xdp_buff *)ctl->ptr)[i]; > - tun_xdp_one(tun, tfile, xdp, &flush, &tpage); > + ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage); > + if (ret > 0) > + queued += ret; > } > > if (flush) > xdp_do_flush(); > > + if (tfile->napi_enabled && queued > 0) > + napi_schedule(&tfile->napi); > + > rcu_read_unlock(); > local_bh_enable(); > > -- > 2.27.0 >