Felix Fietkau <nbd@xxxxxxxx> writes: > Excessive broadcast traffic with little competing unicast traffic can easily > flood hardware queues, leading to throughput issues. Additionally, filling > the hardware queues with too many packets breaks FQ for broadcast data. > Fix this by enabling AQL for broadcast packets. > > Signed-off-by: Felix Fietkau <nbd@xxxxxxxx> > --- > include/net/cfg80211.h | 1 + > include/net/mac80211.h | 2 +- > net/mac80211/debugfs.c | 13 +++++++-- > net/mac80211/ieee80211_i.h | 2 ++ > net/mac80211/main.c | 1 + > net/mac80211/sta_info.c | 17 +++++++++++- > net/mac80211/sta_info.h | 3 ++- > net/mac80211/status.c | 5 ++-- > net/mac80211/tx.c | 55 ++++++++++++++++++++------------------ > 9 files changed, 66 insertions(+), 33 deletions(-) > > diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h > index 192d72c8b465..c2a9af1e3c5e 100644 > --- a/include/net/cfg80211.h > +++ b/include/net/cfg80211.h > @@ -3420,6 +3420,7 @@ enum wiphy_params_flags { > /* The per TXQ device queue limit in airtime */ > #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000 > #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000 > +#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000 > > /* The per interface airtime threshold to switch to lower queue limit */ > #define IEEE80211_AQL_THRESHOLD 24000 > diff --git a/include/net/mac80211.h b/include/net/mac80211.h > index 0a04eaf5343c..5bdc6e05be1b 100644 > --- a/include/net/mac80211.h > +++ b/include/net/mac80211.h > @@ -1221,8 +1221,8 @@ struct ieee80211_tx_info { > status_data_idr:1, > status_data:13, > hw_queue:4, > + tx_time_mc:1, > tx_time_est:10; > - /* 1 free bit */ > > union { > struct { > diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c > index 02b5476a4376..a5aa5e02b2e1 100644 > --- a/net/mac80211/debugfs.c > +++ b/net/mac80211/debugfs.c > @@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct file *file, > "VI %u us\n" > "BE %u us\n" > "BK %u us\n" > + "BC/MC %u us\n" > "total %u us\n", > atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]), > atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]), > atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]), > atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]), > + atomic_read(&local->aql_bc_pending_airtime), > atomic_read(&local->aql_total_pending_airtime)); > return simple_read_from_buffer(user_buf, count, ppos, > buf, len); > @@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct file *file, > "VO %u %u\n" > "VI %u %u\n" > "BE %u %u\n" > - "BK %u %u\n", > + "BK %u %u\n" > + "BC/MC %u\n", > local->aql_txq_limit_low[IEEE80211_AC_VO], > local->aql_txq_limit_high[IEEE80211_AC_VO], > local->aql_txq_limit_low[IEEE80211_AC_VI], > @@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct file *file, > local->aql_txq_limit_low[IEEE80211_AC_BE], > local->aql_txq_limit_high[IEEE80211_AC_BE], > local->aql_txq_limit_low[IEEE80211_AC_BK], > - local->aql_txq_limit_high[IEEE80211_AC_BK]); > + local->aql_txq_limit_high[IEEE80211_AC_BK], > + local->aql_txq_limit_bc); > return simple_read_from_buffer(user_buf, count, ppos, > buf, len); > } > @@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struct file *file, > else > buf[count] = '\0'; > > + if (sscanf(buf, "mcast %u", &q_limit_low) == 1) { > + local->aql_txq_limit_bc = q_limit_low; > + return count; > + } > + > if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) > return -EINVAL; > > diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h > index a3485e4c6132..304cce0b771d 100644 > --- a/net/mac80211/ieee80211_i.h > +++ b/net/mac80211/ieee80211_i.h > @@ -1349,10 +1349,12 @@ struct ieee80211_local { > spinlock_t handle_wake_tx_queue_lock; > > u16 airtime_flags; > + u32 aql_txq_limit_bc; > u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; > u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; > u32 aql_threshold; > atomic_t aql_total_pending_airtime; > + atomic_t aql_bc_pending_airtime; > atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS]; > > const struct ieee80211_ops *ops; > diff --git a/net/mac80211/main.c b/net/mac80211/main.c > index a3104b6ea6f0..6abf85a58133 100644 > --- a/net/mac80211/main.c > +++ b/net/mac80211/main.c > @@ -952,6 +952,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, > spin_lock_init(&local->rx_path_lock); > spin_lock_init(&local->queue_stop_reason_lock); > > + local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC; > for (i = 0; i < IEEE80211_NUM_ACS; i++) { > INIT_LIST_HEAD(&local->active_txqs[i]); > spin_lock_init(&local->active_txq_lock[i]); > diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c > index aa22f09e6d14..ed73f5b7af81 100644 > --- a/net/mac80211/sta_info.c > +++ b/net/mac80211/sta_info.c > @@ -2352,13 +2352,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); > > void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, > struct sta_info *sta, u8 ac, > - u16 tx_airtime, bool tx_completed) > + u16 tx_airtime, bool tx_completed, > + bool mcast) > { > int tx_pending; > > if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) > return; > > + if (mcast) { > + if (!tx_completed) { > + atomic_add(tx_airtime, &local->aql_bc_pending_airtime); > + return; > + } > + > + tx_pending = atomic_sub_return(tx_airtime, > + &local->aql_bc_pending_airtime); > + if (tx_pending < 0) > + atomic_cmpxchg(&local->aql_bc_pending_airtime, > + tx_pending, 0); > + return; > + } > + > if (!tx_completed) { > if (sta) > atomic_add(tx_airtime, > diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h > index 9195d5a2de0a..ad449f8dfa76 100644 > --- a/net/mac80211/sta_info.h > +++ b/net/mac80211/sta_info.h > @@ -147,7 +147,8 @@ struct airtime_info { > > void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, > struct sta_info *sta, u8 ac, > - u16 tx_airtime, bool tx_completed); > + u16 tx_airtime, bool tx_completed, > + bool mcast); > > struct sta_info; > > diff --git a/net/mac80211/status.c b/net/mac80211/status.c > index dd8f857a1fbc..2ef98964a485 100644 > --- a/net/mac80211/status.c > +++ b/net/mac80211/status.c > @@ -734,7 +734,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, > ieee80211_sta_update_pending_airtime(local, sta, > skb_get_queue_mapping(skb), > tx_time_est, > - true); > + true, info->tx_time_mc); > rcu_read_unlock(); > } > > @@ -1158,10 +1158,11 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, > /* Do this here to avoid the expensive lookup of the sta > * in ieee80211_report_used_skb(). > */ > + bool mcast = IEEE80211_SKB_CB(skb)->tx_time_mc; > ieee80211_sta_update_pending_airtime(local, sta, > skb_get_queue_mapping(skb), > tx_time_est, > - true); > + true, mcast); > ieee80211_info_set_tx_time_est(IEEE80211_SKB_CB(skb), 0); > } > > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > index 72a9ba8bc5fd..dfa532d44280 100644 > --- a/net/mac80211/tx.c > +++ b/net/mac80211/tx.c > @@ -2554,7 +2554,7 @@ static u16 ieee80211_store_ack_skb(struct ieee80211_local *local, > > spin_lock_irqsave(&local->ack_status_lock, flags); > id = idr_alloc(&local->ack_status_frames, ack_skb, > - 1, 0x2000, GFP_ATOMIC); > + 1, 0x1000, GFP_ATOMIC); > spin_unlock_irqrestore(&local->ack_status_lock, flags); > > if (id >= 0) { > @@ -3981,20 +3981,20 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, > encap_out: > info->control.vif = vif; > > - if (tx.sta && > - wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { > - bool ampdu = txq->ac != IEEE80211_AC_VO; > + if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { > + bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO; > u32 airtime; > > airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, > skb->len, ampdu); > - if (airtime) { > - airtime = ieee80211_info_set_tx_time_est(info, airtime); > - ieee80211_sta_update_pending_airtime(local, tx.sta, > - txq->ac, > - airtime, > - false); > - } > + if (!airtime) > + return skb; > + > + airtime = ieee80211_info_set_tx_time_est(info, airtime); > + info->tx_time_mc = !tx.sta; > + ieee80211_sta_update_pending_airtime(local, tx.sta, txq->ac, > + airtime, false, > + info->tx_time_mc); > } > > return skb; > @@ -4046,6 +4046,7 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) > struct ieee80211_txq *ret = NULL; > struct txq_info *txqi = NULL, *head = NULL; > bool found_eligible_txq = false; > + bool aql_check; > > spin_lock_bh(&local->active_txq_lock[ac]); > > @@ -4069,26 +4070,27 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) > if (!head) > head = txqi; > > + aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); > + if (aql_check) > + found_eligible_txq = true; > + > if (txqi->txq.sta) { > struct sta_info *sta = container_of(txqi->txq.sta, > struct sta_info, sta); > - bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); > - s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac); > > - if (aql_check) > - found_eligible_txq = true; > - > - if (deficit < 0) > + if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) { > sta->airtime[txqi->txq.ac].deficit += > sta->airtime_weight; > - > - if (deficit < 0 || !aql_check) { > - list_move_tail(&txqi->schedule_order, > - &local->active_txqs[txqi->txq.ac]); > - goto begin; > + aql_check = false; > } > } > > + if (!aql_check) { > + list_move_tail(&txqi->schedule_order, > + &local->active_txqs[txqi->txq.ac]); > + goto begin; > + } > + > if (txqi->schedule_round == local->schedule_round[ac]) > goto out; > > @@ -4153,7 +4155,8 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, > return true; > > if (!txq->sta) > - return true; > + return atomic_read(&local->aql_bc_pending_airtime) < > + local->aql_txq_limit_bc; > > if (unlikely(txq->tid == IEEE80211_NUM_TIDS)) > return true; > @@ -4202,15 +4205,15 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, > > spin_lock_bh(&local->active_txq_lock[ac]); > > - if (!txqi->txq.sta) > - goto out; > - > if (list_empty(&txqi->schedule_order)) > goto out; > > if (!ieee80211_txq_schedule_airtime_check(local, ac)) > goto out; > > + if (!txqi->txq.sta) > + goto out; > + I'm not sure this last change makes any difference? Before, if !sta, we'll always return true. After, if ieee80211_txq_schedule_airtime_check() returns false, we'll return true, and if ieee80211_txq_schedule_airtime_check() returns true, we'll go to the same sta check, so if !sta we'll still return true. So changing the order of the checks is a bit pointless? In fact, it seems to me that the logic here around ieee80211_txq_schedule_airtime_check() is reversed; shouldn't we be returning *false* (from the may_schedule) if ieee80211_txq_schedule_airtime_check() returns false? -Toke