Reduce BH-disable period further by moving cpu_map_build_skb() outside/before invoking the network stack. And build up a skb_list that is used for netif_receive_skb_list. This is also an I-cache optimization. When injecting packets into the network stack, cpumap uses a special function named netif_receive_skb_core(), and we create a equivalent list version named netif_receive_skb_list_core(). Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> --- include/linux/netdevice.h | 1 + kernel/bpf/cpumap.c | 17 ++++++++++------- net/core/dev.c | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 166fdc0a78b4..37e78dc9f30a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3621,6 +3621,7 @@ int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list(struct list_head *head); +void netif_receive_skb_list_core(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 430103e182a0..cb93df200cd0 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -256,6 +256,7 @@ static int cpu_map_kthread_run(void *data) while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { unsigned int drops = 0, sched = 0; void *frames[CPUMAP_BATCH]; + struct list_head skb_list; int i, n; /* Release CPU reschedule checks */ @@ -279,23 +280,25 @@ static int cpu_map_kthread_run(void *data) */ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); - local_bh_disable(); + INIT_LIST_HEAD(&skb_list); + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; struct sk_buff *skb; - int ret; skb = cpu_map_build_skb(rcpu, xdpf); if (!skb) { xdp_return_frame(xdpf); continue; } - - /* Inject into network stack */ - ret = netif_receive_skb_core(skb); - if (ret == NET_RX_DROP) - drops++; + list_add_tail(&skb->list, &skb_list); } + + local_bh_disable(); + + /* Inject into network stack */ + netif_receive_skb_list_core(&skb_list); + /* Feedback loop via tracepoint */ trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched); diff --git a/net/core/dev.c b/net/core/dev.c index 9ca2d3abfd1a..1dee7bd895a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5297,6 +5297,24 @@ void netif_receive_skb_list(struct list_head *head) } EXPORT_SYMBOL(netif_receive_skb_list); +/** + * netif_receive_skb_list_core - special version of netif_receive_skb_list + * @head: list of skbs to process. + * + * More direct receive version of netif_receive_skb_list(). It should + * only be used by callers that have a need to skip RPS and Generic XDP. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + */ +void netif_receive_skb_list_core(struct list_head *head) +{ + rcu_read_lock(); + __netif_receive_skb_list(head); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netif_receive_skb_list_core); + DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */