On Thu, May 13, 2021 at 7:27 PM Yunsheng Lin <linyunsheng@xxxxxxxxxx> wrote: > struct qdisc_size_table { > @@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) > static inline bool qdisc_run_begin(struct Qdisc *qdisc) > { > if (qdisc->flags & TCQ_F_NOLOCK) { > + if (spin_trylock(&qdisc->seqlock)) > + goto nolock_empty; > + > + /* If the MISSED flag is set, it means other thread has > + * set the MISSED flag before second spin_trylock(), so > + * we can return false here to avoid multi cpus doing > + * the set_bit() and second spin_trylock() concurrently. > + */ > + if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) > + return false; > + > + /* Set the MISSED flag before the second spin_trylock(), > + * if the second spin_trylock() return false, it means > + * other cpu holding the lock will do dequeuing for us > + * or it will see the MISSED flag set after releasing > + * lock and reschedule the net_tx_action() to do the > + * dequeuing. > + */ > + set_bit(__QDISC_STATE_MISSED, &qdisc->state); > + > + /* Retry again in case other CPU may not see the new flag > + * after it releases the lock at the end of qdisc_run_end(). > + */ > if (!spin_trylock(&qdisc->seqlock)) > return false; > + > +nolock_empty: > WRITE_ONCE(qdisc->empty, false); > } else if (qdisc_is_running(qdisc)) { > return false; > @@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) > static inline void qdisc_run_end(struct Qdisc *qdisc) > { > write_seqcount_end(&qdisc->running); > - if (qdisc->flags & TCQ_F_NOLOCK) > + if (qdisc->flags & TCQ_F_NOLOCK) { > spin_unlock(&qdisc->seqlock); > + > + if (unlikely(test_bit(__QDISC_STATE_MISSED, > + &qdisc->state))) { > + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); We have test_and_clear_bit() which is atomic, test_bit()+clear_bit() is not. > + __netif_schedule(qdisc); > + } > + } > } > > static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c > index 44991ea..795d986 100644 > --- a/net/sched/sch_generic.c > +++ b/net/sched/sch_generic.c > @@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) > { > struct pfifo_fast_priv *priv = qdisc_priv(qdisc); > struct sk_buff *skb = NULL; > + bool need_retry = true; > int band; > > +retry: > for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { > struct skb_array *q = band2list(priv, band); > > @@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) > } > if (likely(skb)) { > qdisc_update_stats_at_dequeue(qdisc, skb); > + } else if (need_retry && > + test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { > + /* Delay clearing the STATE_MISSED here to reduce > + * the overhead of the second spin_trylock() in > + * qdisc_run_begin() and __netif_schedule() calling > + * in qdisc_run_end(). > + */ > + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); Ditto. > + > + /* Make sure dequeuing happens after clearing > + * STATE_MISSED. > + */ > + smp_mb__after_atomic(); > + > + need_retry = false; > + > + goto retry; Two concurrent pfifo_fast_dequeue() would possibly retry it at the same time when they test __QDISC_STATE_MISSED at the same time and get true. Is this a problem? Also, any reason why you want pfifo_fast to handle a generic Qdisc flag? IOW, why not handle this logic in, for example, qdisc_restart()? Thanks.