The network stack defers SKBs free, in-case free happens in IRQ or when IRQs are disabled. This happens in __dev_kfree_skb_irq() that writes SKBs that were free'ed during IRQ to the softirq completion queue (softnet_data.completion_queue). These SKBs are naturally delayed, and cleaned up during NET_TX_SOFTIRQ in function net_tx_action(). Take advantage of this a use the skb defer and flush API, as we are already in softirq context. For modern drivers this rarely happens. Although most drivers do call dev_kfree_skb_any(), which detects the situation and calls __dev_kfree_skb_irq() when needed. This due to netpoll can call from IRQ context. Signed-off-by: Alexander Duyck <alexander.h.duyck@xxxxxxxxxx> Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> --- include/linux/skbuff.h | 1 + net/core/dev.c | 8 +++++++- net/core/skbuff.c | 8 ++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a3dec82e0e2c..f314abff2cbd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2311,6 +2311,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, void napi_consume_skb(struct sk_buff *skb, int budget); void __kfree_skb_flush(void); +void __kfree_skb_defer(struct sk_buff *skb); /** * __dev_alloc_pages - allocate page for network Rx diff --git a/net/core/dev.c b/net/core/dev.c index 204059f67154..0e88397db1fa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3588,8 +3588,14 @@ static void net_tx_action(struct softirq_action *h) trace_consume_skb(skb); else trace_kfree_skb(skb, net_tx_action); - __kfree_skb(skb); + + if (skb->fclone != SKB_FCLONE_UNAVAILABLE) + __kfree_skb(skb); + else + __kfree_skb_defer(skb); } + + __kfree_skb_flush(); } if (sd->output_queue) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2682ac46d640..2ffcb014e00b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -767,7 +767,7 @@ void __kfree_skb_flush(void) } } -static void __kfree_skb_defer(struct sk_buff *skb) +static inline void _kfree_skb_defer(struct sk_buff *skb) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -789,6 +789,10 @@ static void __kfree_skb_defer(struct sk_buff *skb) nc->skb_count = 0; } } +void __kfree_skb_defer(struct sk_buff *skb) +{ + _kfree_skb_defer(skb); +} void napi_consume_skb(struct sk_buff *skb, int budget) { @@ -814,7 +818,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - __kfree_skb_defer(skb); + _kfree_skb_defer(skb); } EXPORT_SYMBOL(napi_consume_skb); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>