Now that cpumap uses GRO, which drops unused skb heads to the NAPI cache, use napi_skb_cache_get_bulk() to try to reuse cached entries and lower the MM layer pressure. In the situation when all 8 skbs from one cpumap batch goes into one GRO skb (so the rest 7 go into the cache), there will now be only 1 skb to allocate per cycle instead of 8. If there is some other work happening in between the cycles, even all 8 might be getting decached each cycle. This makes the BH-off period per each batch slightly longer -- previously, skb allocation was happening in the process context. Signed-off-by: Alexander Lobakin <alexandr.lobakin@xxxxxxxxx> --- kernel/bpf/cpumap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 145f49de0931..1bb3ae570e6c 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -365,7 +365,6 @@ static int cpu_map_kthread_run(void *data) while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { struct xdp_cpumap_stats stats = {}; /* zero stats */ unsigned int kmem_alloc_drops = 0, sched = 0; - gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; int i, n, m, nframes, xdp_n; void *frames[CPUMAP_BATCH]; void *skbs[CPUMAP_BATCH]; @@ -416,8 +415,10 @@ static int cpu_map_kthread_run(void *data) /* Support running another XDP prog on this CPU */ nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list); + local_bh_disable(); + if (nframes) { - m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); + m = napi_skb_cache_get_bulk(skbs, nframes); if (unlikely(m == 0)) { for (i = 0; i < nframes; i++) skbs[i] = NULL; /* effect: xdp_return_frame */ @@ -425,7 +426,6 @@ static int cpu_map_kthread_run(void *data) } } - local_bh_disable(); for (i = 0; i < nframes; i++) { struct xdp_frame *xdpf = frames[i]; struct sk_buff *skb = skbs[i]; -- 2.36.1