On 2018-10-12 03:16, Toke Høiland-Jørgensen wrote:
- Just loop with the smaller quantum until one of the stations go into
the positive (what we do now).
- Go through all active stations, find the one that is closest being in
the positive, and add that amount to the quantum. I.e., something
like (assuming no station has positive deficit; if one does, you
don't
want to add anything anyway):
to_add = -(max(stn.deficit) for stn in active stations)
for stn in active stations:
stn.deficit += to_add + stn.weight
Toke,
Sorry for the delayed response. I did lot of experiments. Below are my
observations.
Sorry for lengthy reply.
In current model, next_txq() is main routine that serves DRR and
fairness is
enforced by serving only only first txq. Here the first node could be
either
newly initiated traffic or returned node by return_txq(). This works
perfectly
as long as the driver is running any RR algo.
Whereas in ath10k, firmware runs its own RR in pull mode and builds txq
list
based on driver's hostq table. In this case it can not be simply assumed
that
firmware always gives fetch request for first node of mac80211's txq
list.
i.e both RR algo could be out of sync.
Two major differences b/w ath9k and ath10k
1) Serving txqs
The ath9k always serves txq by next_txq and so that the txqs_list is
rotated to serve
other txq. But in ath10k (pull-mode), first node becomes sticky one
until it is
picked by firmware via fetch indication and it becomes negative deficit.
The sequence is followed in wake_tx_queue
- dequeue first node
- push is not allowed
- enqueue same txq back to head
2) Refill rate of deficit.
The ath9k refills deficit mostly in hot path by next_txq() in tx & isr
routine.
In case of ath10k, due to above problem, deficits wont be filled in hot
path.
Either it should be filled in fetch_ind itself or by scheduling another
task.
Both the approaches are slower compared to hot path when the driver is
bursting
aggregation. On an idle condition a single fetch indication can dequeue
~190 msdus
from each tid of give stn list. This drains the deficit quickly and
becomes too low.
To speed up this, either refill the station by multiples of stn airtime
weight or
allows the txqs_list rotation. So that next_txq will be used for
refilling deficit.
Attaching return_txq() change that helps to get rid of quantum multiple.
-Rajkumar
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 625a4ab37ea0..269ae8311056 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2352,7 +2352,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
num_msdus++;
num_bytes += ret;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, true);
ieee80211_txq_schedule_end(hw, txq->ac);
record->num_msdus = cpu_to_le16(num_msdus);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index cf64d9e02a24..d39bc841ea04 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4206,7 +4206,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac)
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, true);
ath10k_htt_tx_txq_update(hw, txq);
if (ret == -EBUSY)
break;
@@ -4475,18 +4475,21 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
{
u8 ac = txq->ac;
int ret = 0;
+ bool pushed = false;
+ ath10k_htt_tx_txq_update(hw, txq);
ieee80211_txq_schedule_start(hw, ac);
txq = ieee80211_next_txq(hw, ac);
if (!txq)
goto out;
while (ath10k_mac_tx_can_push(hw, txq)) {
+ pushed = true;
ret = ath10k_mac_tx_push_txq(hw, txq);
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, pushed);
ath10k_htt_tx_txq_update(hw, txq);
out:
ieee80211_txq_schedule_end(hw, ac);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 6aab06909e76..40ff0bdbf7c9 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -117,7 +117,7 @@ void ath_tx_queue_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
struct ieee80211_txq *queue = container_of(
(void *)tid, struct ieee80211_txq, drv_priv);
- ieee80211_return_txq(sc->hw, queue);
+ ieee80211_return_txq(sc->hw, queue, false);
}
void ath9k_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *queue)
@@ -1913,7 +1913,7 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq)
ret = ath_tx_sched_aggr(sc, txq, tid);
ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret);
- ieee80211_return_txq(hw, queue);
+ ieee80211_return_txq(hw, queue, false);
}
out:
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9cadfa408f50..995e19e29d9e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6102,7 +6102,7 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
* Should only be called between calls to ieee80211_txq_schedule_start()
* and ieee80211_txq_schedule_end().
*/
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
+void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txqi, bool to_tail);
/**
* ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 39dc40ac5abe..530270f8cc52 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1175,7 +1175,7 @@ static inline void schedule_and_wake_txq(struct ieee80211_local *local,
struct txq_info *txqi)
{
spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
- ieee80211_return_txq(&local->hw, &txqi->txq);
+ ieee80211_return_txq(&local->hw, &txqi->txq, false);
spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
drv_wake_tx_queue(local, txqi);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d8682930a469..acf0505bdd7f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3623,7 +3623,8 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
EXPORT_SYMBOL(ieee80211_next_txq);
void ieee80211_return_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+ struct ieee80211_txq *txq,
+ bool to_tail)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
@@ -3641,7 +3642,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
*/
if (wiphy_ext_feature_isset(local->hw.wiphy,
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)
- && txqi->txq.sta)
+ && txqi->txq.sta && !to_tail)
list_add(&txqi->schedule_order,
&local->active_txqs[txq->ac]);
else
@@ -3652,7 +3653,6 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_return_txq);
-#define IEEE80211_TXQ_MAY_TX_QUANTUM 20
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
@@ -3670,13 +3670,8 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
if (sta->airtime[ac].deficit >= 0)
goto out;
- list_for_each_entry(txqi, &local->active_txqs[ac], schedule_order) {
- if (!txqi->txq.sta)
- continue;
- sta = container_of(txqi->txq.sta, struct sta_info, sta);
- sta->airtime[ac].deficit +=
- (IEEE80211_TXQ_MAY_TX_QUANTUM * sta->airtime_weight);
- }
+ sta->airtime[ac].deficit += sta->airtime_weight;
+ list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
return false;