On 21 January 2016 at 18:40, Peter Oh <poh@xxxxxxxxxxxxxx> wrote: > > On 01/21/2016 05:46 AM, Michal Kazior wrote: >> >> The current/old tx path design was that host, at >> its own leisure, pushed tx frames to the device. >> For HTT there was ~1000-1400 msdu queue depth. >> >> After reaching that limit the driver would request >> mac80211 to stop queues. There was little control >> over what packets got in there as far as >> DA/RA was considered so it was rather easy to >> starve per-station traffic flows. >> >> With MU-MIMO this became a significant problem >> because the queue depth was insufficient to buffer >> frames from multiple clients (which could have >> different signal quality and capabilities) in an >> efficient fashion. >> >> Hence the new tx path in 10.4 was introduced: a >> pull-push mode. >> >> Firmware and host can share tx queue state via >> DMA. The state is logically a 2 dimensional array >> addressed via peer_id+tid pair. Each entry is a >> counter (either number of bytes or packets. Host >> keeps it updated and firmware uses it for >> scheduling Tx pull requests to host. >> >> This allows MU-MIMO to become a lot more effective >> with 10+ clients. >> >> Signed-off-by: Michal Kazior <michal.kazior@xxxxxxxxx> >> --- >> drivers/net/wireless/ath/ath10k/core.h | 1 + >> drivers/net/wireless/ath/ath10k/htt.h | 6 ++ >> drivers/net/wireless/ath/ath10k/htt_rx.c | 117 >> ++++++++++++++++++++++++++++--- >> drivers/net/wireless/ath/ath10k/htt_tx.c | 39 ++++++++--- >> drivers/net/wireless/ath/ath10k/mac.c | 48 +++++++++++-- >> drivers/net/wireless/ath/ath10k/mac.h | 5 ++ >> 6 files changed, 192 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/net/wireless/ath/ath10k/core.h >> b/drivers/net/wireless/ath/ath10k/core.h >> index f51887c6be74..ab8cbccc0f4b 100644 >> --- a/drivers/net/wireless/ath/ath10k/core.h >> +++ b/drivers/net/wireless/ath/ath10k/core.h >> @@ -307,6 +307,7 @@ struct ath10k_peer { >> struct ath10k_txq { >> unsigned long num_fw_queued; >> + unsigned long num_push_allowed; >> }; >> struct ath10k_sta { >> diff --git a/drivers/net/wireless/ath/ath10k/htt.h >> b/drivers/net/wireless/ath/ath10k/htt.h >> index b1e40f44e76b..02cf55d306e8 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt.h >> +++ b/drivers/net/wireless/ath/ath10k/htt.h >> @@ -1652,6 +1652,7 @@ struct ath10k_htt { >> struct sk_buff_head tx_compl_q; >> struct sk_buff_head rx_compl_q; >> struct sk_buff_head rx_in_ord_compl_q; >> + struct sk_buff_head tx_fetch_ind_q; >> /* rx_status template */ >> struct ieee80211_rx_status rx_status; >> @@ -1670,8 +1671,10 @@ struct ath10k_htt { >> bool enabled; >> struct htt_q_state *vaddr; >> dma_addr_t paddr; >> + u16 num_push_allowed; >> u16 num_peers; >> u16 num_tids; >> + enum htt_tx_mode_switch_mode mode; >> enum htt_q_depth_type type; >> } tx_q_state; >> }; >> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar, >> void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw, >> struct ieee80211_txq *txq); >> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw, >> + struct ieee80211_txq *txq); >> +void ath10k_htt_tx_txq_sync(struct ath10k *ar); >> void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt, >> bool is_mgmt); >> int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt, >> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c >> b/drivers/net/wireless/ath/ath10k/htt_rx.c >> index 6e3d95c95568..827c8369b589 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c >> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c >> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt) >> skb_queue_purge(&htt->tx_compl_q); >> skb_queue_purge(&htt->rx_compl_q); >> skb_queue_purge(&htt->rx_in_ord_compl_q); >> + skb_queue_purge(&htt->tx_fetch_ind_q); >> ath10k_htt_rx_ring_free(htt); >> @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) >> skb_queue_head_init(&htt->tx_compl_q); >> skb_queue_head_init(&htt->rx_compl_q); >> skb_queue_head_init(&htt->rx_in_ord_compl_q); >> + skb_queue_head_init(&htt->tx_fetch_ind_q); >> tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, >> (unsigned long)htt); >> @@ -2004,16 +2006,21 @@ static void >> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, >> static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct >> sk_buff >> *skb) >> { >> + struct ieee80211_hw *hw = ar->hw; >> + struct ieee80211_txq *txq; >> struct htt_resp *resp = (struct htt_resp *)skb->data; >> struct htt_tx_fetch_record *record; >> size_t len; >> size_t max_num_bytes; >> size_t max_num_msdus; >> + size_t num_bytes; >> + size_t num_msdus; >> const __le32 *resp_ids; >> u16 num_records; >> u16 num_resp_ids; >> u16 peer_id; >> u8 tid; >> + int ret; >> int i; >> ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n"); >> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct >> ath10k *ar, struct sk_buff *skb) >> num_records, num_resp_ids, >> le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num)); >> - /* TODO: runtime sanity checks */ >> + if (!ar->htt.tx_q_state.enabled) { >> + ath10k_warn(ar, "received unexpected tx_fetch_ind event: >> not enabled\n"); >> + return; >> + } >> + >> + if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) { >> + ath10k_warn(ar, "received unexpected tx_fetch_ind event: >> in push mode\n"); >> + return; >> + } >> + >> + rcu_read_lock(); >> for (i = 0; i < num_records; i++) { >> record = &resp->tx_fetch_ind.records[i]; >> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct >> ath10k *ar, struct sk_buff *skb) >> continue; >> } >> - /* TODO: dequeue and submit tx to device */ >> + spin_lock_bh(&ar->data_lock); >> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid); >> + spin_unlock_bh(&ar->data_lock); >> + >> + /* It is okay to release the lock and use txq because RCU >> read >> + * lock is held. >> + */ >> + >> + if (unlikely(!txq)) { >> + ath10k_warn(ar, "failed to lookup txq for peer_id >> %hu tid %hhu\n", >> + peer_id, tid); >> + continue; >> + } >> + >> + num_msdus = 0; >> + num_bytes = 0; >> + >> + while (num_msdus < max_num_msdus && >> + num_bytes < max_num_bytes) { >> + ret = ath10k_mac_tx_push_txq(hw, txq); >> + if (ret < 0) >> + break; >> + >> + num_msdus++; >> + num_bytes += ret; >> + } >> + >> + record->num_msdus = cpu_to_le16(num_msdus); >> + record->num_bytes = cpu_to_le32(num_bytes); >> + >> + ath10k_htt_tx_txq_recalc(hw, txq); >> } >> + rcu_read_unlock(); >> + >> resp_ids = >> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind); >> ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids, >> num_resp_ids); >> - /* TODO: generate and send fetch response to device */ >> + ret = ath10k_htt_tx_fetch_resp(ar, >> + resp->tx_fetch_ind.token, >> + resp->tx_fetch_ind.fetch_seq_num, >> + resp->tx_fetch_ind.records, >> + num_records); >> + if (unlikely(ret)) { >> + ath10k_warn(ar, "failed to submit tx fetch resp for token >> 0x%08x: %d\n", >> + le32_to_cpu(resp->tx_fetch_ind.token), ret); >> + /* FIXME: request fw restart */ >> + } >> + >> + ath10k_htt_tx_txq_sync(ar); >> } >> static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar, >> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct >> ath10k *ar, >> { >> const struct htt_resp *resp = (void *)skb->data; >> const struct htt_tx_mode_switch_record *record; >> + struct ieee80211_txq *txq; >> + struct ath10k_txq *artxq; >> size_t len; >> size_t num_records; >> enum htt_tx_mode_switch_mode mode; >> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct >> ath10k *ar, >> if (!enable) >> return; >> - /* TODO: apply configuration */ >> + ar->htt.tx_q_state.enabled = enable; >> + ar->htt.tx_q_state.mode = mode; >> + ar->htt.tx_q_state.num_push_allowed = threshold; >> + >> + rcu_read_lock(); >> for (i = 0; i < num_records; i++) { >> record = &resp->tx_mode_switch_ind.records[i]; >> @@ -2168,10 +2234,33 @@ static void >> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar, >> continue; >> } >> - /* TODO: apply configuration */ >> + spin_lock_bh(&ar->data_lock); >> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid); >> + spin_unlock_bh(&ar->data_lock); >> + >> + /* It is okay to release the lock and use txq because RCU >> read >> + * lock is held. >> + */ >> + >> + if (unlikely(!txq)) { >> + ath10k_warn(ar, "failed to lookup txq for peer_id >> %hu tid %hhu\n", >> + peer_id, tid); >> + continue; >> + } >> + >> + spin_lock_bh(&ar->htt.tx_lock); >> + artxq = (void *)txq->drv_priv; >> + artxq->num_push_allowed = >> le16_to_cpu(record->num_max_msdus); >> + spin_unlock_bh(&ar->htt.tx_lock); >> } >> - /* TODO: apply configuration */ >> + rcu_read_unlock(); >> + >> + spin_lock_bh(&ar->htt.tx_lock); >> + ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING); >> + spin_unlock_bh(&ar->htt.tx_lock); >> + > > Isn't it proved it break Mesh from working? Yes, good point. I'm still working on this - I should've mentioned that in the cover letter. Nonetheless I wanted to get the review process going for this patchset. I'll address the mesh problem in v2. Michał -- To unsubscribe from this list: send the line "unsubscribe linux-wireless" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html