3598 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 3599 struct net_device *dev, 3600 struct netdev_queue *txq) 3601 { ... 3650 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 3651 qdisc_run_begin(q)) { ---> Those multiple *and conditions* in this if statement are not necessarily executed sequentially. If the qdisc_run_begin(q) statement is executed first and the other conditions are not satisfied, qdisc_run_end will have no chance to be executed, and the lowest bit of q->running will always be 1. This may lead to a softlockup: https://bugzilla.kernel.org/show_bug.cgi?id=205427 ... 3657 3658 qdisc_bstats_update(q, skb); ... 3661 if (unlikely(contended)) { 3662 spin_unlock(&q->busylock); 3663 contended = false; 3664 } 3665 __qdisc_run(q); 3666 } 3667 3668 qdisc_run_end(q); 3669 rc = NET_XMIT_SUCCESS; 3670 } ... We ensure the correct execution order by explicitly specifying those dependencies. Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Wen Yang <wenyang@xxxxxxxxxxxxxxxxx> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx> Cc: Eric Dumazet <edumazet@xxxxxxxxxx> Cc: Cong Wang <xiyou.wangcong@xxxxxxxxx> Cc: Jamal Hadi Salim <jhs@xxxxxxxxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: Kevin Athey <kda@xxxxxxxxxx> Cc: Xiaotian Pei <xiaotian@xxxxxxxxxx> Cc: netdev@xxxxxxxxxxxxxxx Cc: bpf@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx --- net/core/dev.c | 63 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 20c7a67..d2690ee 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3602,27 +3602,28 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, spinlock_t *root_lock = qdisc_lock(q); struct sk_buff *to_free = NULL; bool contended; - int rc; + int rc = NET_XMIT_SUCCESS; qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); + if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty) { + if (qdisc_run_begin(q)) { + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + __qdisc_drop(skb, &to_free); + rc = NET_XMIT_DROP; + goto end_run; + } + qdisc_bstats_cpu_update(q, skb); - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); + if (sch_direct_xmit(skb, q, dev, txq, NULL, + true)) + __qdisc_run(q); end_run: - qdisc_run_end(q); + qdisc_run_end(q); + } } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3647,26 +3648,28 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; - } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && - qdisc_run_begin(q)) { - /* - * This is a work-conserving queue; there are no old skbs - * waiting to be sent out; and the qdisc is not running - - * xmit the skb directly. - */ + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q)) { + if (qdisc_run_begin(q)) { + /* This is a work-conserving queue; + * there are no old skbs waiting to be sent out; + * and the qdisc is not running - + * xmit the skb directly. + */ - qdisc_bstats_update(q, skb); + qdisc_bstats_update(q, skb); - if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) { - if (unlikely(contended)) { - spin_unlock(&q->busylock); - contended = false; + if (sch_direct_xmit(skb, q, dev, txq, root_lock, + true)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } + __qdisc_run(q); } - __qdisc_run(q); - } - qdisc_run_end(q); - rc = NET_XMIT_SUCCESS; + qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; + } } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { -- 1.8.3.1