On 10/04/2019 12:43, Jesper Dangaard Brouer wrote: > Reduce BH-disable period further by moving cpu_map_build_skb() > outside/before invoking the network stack. And build up a > skb_list that is used for netif_receive_skb_list. This is also > an I-cache optimization. > > When injecting packets into the network stack, cpumap uses a special > function named netif_receive_skb_core(), and we create a equivalent list > version named netif_receive_skb_list_core(). > > Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> > --- > include/linux/netdevice.h | 1 + > kernel/bpf/cpumap.c | 17 ++++++++++------- > net/core/dev.c | 18 ++++++++++++++++++ > 3 files changed, 29 insertions(+), 7 deletions(-) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 166fdc0a78b4..37e78dc9f30a 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -3621,6 +3621,7 @@ int netif_rx_ni(struct sk_buff *skb); > int netif_receive_skb(struct sk_buff *skb); > int netif_receive_skb_core(struct sk_buff *skb); > void netif_receive_skb_list(struct list_head *head); > +void netif_receive_skb_list_core(struct list_head *head); > gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); > void napi_gro_flush(struct napi_struct *napi, bool flush_old); > struct sk_buff *napi_get_frags(struct napi_struct *napi); > diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c > index 430103e182a0..cb93df200cd0 100644 > --- a/kernel/bpf/cpumap.c > +++ b/kernel/bpf/cpumap.c > @@ -256,6 +256,7 @@ static int cpu_map_kthread_run(void *data) > while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { > unsigned int drops = 0, sched = 0; > void *frames[CPUMAP_BATCH]; > + struct list_head skb_list; > int i, n; > > /* Release CPU reschedule checks */ > @@ -279,23 +280,25 @@ static int cpu_map_kthread_run(void *data) > */ > n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); > > - local_bh_disable(); > + INIT_LIST_HEAD(&skb_list); > + > for (i = 0; i < n; i++) { > struct xdp_frame *xdpf = frames[i]; > struct sk_buff *skb; > - int ret; > > skb = cpu_map_build_skb(rcpu, xdpf); > if (!skb) { > xdp_return_frame(xdpf); > continue; > } > - > - /* Inject into network stack */ > - ret = netif_receive_skb_core(skb); > - if (ret == NET_RX_DROP) > - drops++; You're losing this `drops` incrementation and not doing anything to replace it... > + list_add_tail(&skb->list, &skb_list); > } > + > + local_bh_disable(); > + > + /* Inject into network stack */ > + netif_receive_skb_list_core(&skb_list); > + > /* Feedback loop via tracepoint */ > trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched); ... yet still feeding it to the tracepoint here. I ran into something similar with my list-GRO patches (callers wanted to know how many packets from the list were received vs. dropped); check those to see how I wired that counting all the way through the listified stack. Apart from that, I like this! -Ed > > diff --git a/net/core/dev.c b/net/core/dev.c > index 9ca2d3abfd1a..1dee7bd895a0 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -5297,6 +5297,24 @@ void netif_receive_skb_list(struct list_head *head) > } > EXPORT_SYMBOL(netif_receive_skb_list); > > +/** > + * netif_receive_skb_list_core - special version of netif_receive_skb_list > + * @head: list of skbs to process. > + * > + * More direct receive version of netif_receive_skb_list(). It should > + * only be used by callers that have a need to skip RPS and Generic XDP. > + * > + * This function may only be called from softirq context and interrupts > + * should be enabled. > + */ > +void netif_receive_skb_list_core(struct list_head *head) > +{ > + rcu_read_lock(); > + __netif_receive_skb_list(head); > + rcu_read_unlock(); > +} > +EXPORT_SYMBOL(netif_receive_skb_list_core); > + > DEFINE_PER_CPU(struct work_struct, flush_works); > > /* Network device is going away, flush any packets still pending */ >