Introduce sdio tx aggregation to reduce bus transaction ands improve tx throughput. For the moment the skb are copied in a dedicated buffer since mmc APIs do not support sg table for zero-copy. Since skb data are already copied in xmit_buff[], avoid linearization in ma80211 layer. Relying on tx aggregation, we improve tx tpt of ~65%. Tested-by: Sean Wang <sean.wang@xxxxxxxxxxxx> Co-developed-by: Sean Wang <sean.wang@xxxxxxxxxxxx> Signed-off-by: Sean Wang <sean.wang@xxxxxxxxxxxx> Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> --- drivers/net/wireless/mediatek/mt76/mac80211.c | 5 +- drivers/net/wireless/mediatek/mt76/mt76.h | 3 ++ .../net/wireless/mediatek/mt76/mt7615/sdio.c | 12 ++++- .../wireless/mediatek/mt76/mt7615/sdio_txrx.c | 49 +++++++++++++------ 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 7014846fd854..bd692431b07f 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -307,10 +307,7 @@ mt76_phy_init(struct mt76_dev *dev, struct ieee80211_hw *hw) if (!(dev->drv->drv_flags & MT_DRV_AMSDU_OFFLOAD)) { ieee80211_hw_set(hw, TX_AMSDU); - - /* TODO: avoid linearization for SDIO */ - if (!mt76_is_sdio(dev)) - ieee80211_hw_set(hw, TX_FRAG_LIST); + ieee80211_hw_set(hw, TX_FRAG_LIST); } ieee80211_hw_set(hw, MFP_CAPABLE); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 209d23846d4c..c4a02b5aa990 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -444,6 +444,7 @@ struct mt76_usb { } mcu; }; +#define MT76S_XMIT_BUF_SZ (16 * PAGE_SIZE) struct mt76_sdio { struct workqueue_struct *txrx_wq; struct { @@ -457,6 +458,8 @@ struct mt76_sdio { struct work_struct stat_work; + u8 *xmit_buf[MT_TXQ_MCU_WA]; + struct sdio_func *func; void *intr_data; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c index 8621c6f579aa..874c929d8552 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c @@ -346,7 +346,7 @@ static int mt7663s_probe(struct sdio_func *func, struct ieee80211_ops *ops; struct mt7615_dev *dev; struct mt76_dev *mdev; - int ret; + int i, ret; ops = devm_kmemdup(&func->dev, &mt7615_ops, sizeof(mt7615_ops), GFP_KERNEL); @@ -387,6 +387,16 @@ static int mt7663s_probe(struct sdio_func *func, goto err_deinit; } + for (i = 0; i < ARRAY_SIZE(mdev->sdio.xmit_buf); i++) { + mdev->sdio.xmit_buf[i] = devm_kmalloc(mdev->dev, + MT76S_XMIT_BUF_SZ, + GFP_KERNEL); + if (!mdev->sdio.xmit_buf[i]) { + ret = -ENOMEM; + goto err_deinit; + } + } + ret = mt76s_alloc_queues(&dev->mt76); if (ret) goto err_deinit; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c index 4033fe431312..e82f6bdcbce4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c @@ -138,15 +138,11 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, return i; } -static int mt7663s_tx_pick_quota(struct mt76_dev *dev, enum mt76_txq_id qid, +static int mt7663s_tx_pick_quota(struct mt76_sdio *sdio, enum mt76_txq_id qid, int buf_sz, int *pse_size, int *ple_size) { - struct mt76_sdio *sdio = &dev->sdio; int pse_sz; - if (!test_bit(MT76_STATE_MCU_RUNNING, &dev->phy.state)) - return 0; - pse_sz = DIV_ROUND_UP(buf_sz + sdio->sched.deficit, MT_PSE_PAGE_SZ); if (qid == MT_TXQ_MCU) { @@ -197,27 +193,52 @@ static int __mt7663s_xmit_queue(struct mt76_dev *dev, u8 *data, int len) static int mt7663s_tx_run_queue(struct mt76_dev *dev, enum mt76_txq_id qid) { - int nframes = 0, pse_sz = 0, ple_sz = 0; + int err, nframes = 0, len = 0, pse_sz = 0, ple_sz = 0; struct mt76_queue *q = dev->q_tx[qid]; struct mt76_sdio *sdio = &dev->sdio; while (q->first != q->head) { struct mt76_queue_entry *e = &q->entry[q->first]; - int err; + struct sk_buff *iter; + + if (!test_bit(MT76_STATE_MCU_RUNNING, &dev->phy.state)) { + __skb_put_zero(e->skb, 4); + err = __mt7663s_xmit_queue(dev, e->skb->data, + e->skb->len); + if (err) + return err; + + goto next; + } + + if (len + e->skb->len + 4 > MT76S_XMIT_BUF_SZ) + break; - if (mt7663s_tx_pick_quota(dev, qid, e->buf_sz, &pse_sz, + if (mt7663s_tx_pick_quota(sdio, qid, e->buf_sz, &pse_sz, &ple_sz)) break; - __skb_put_zero(e->skb, 4); + memcpy(sdio->xmit_buf[qid] + len, e->skb->data, + skb_headlen(e->skb)); + len += skb_headlen(e->skb); + nframes++; - err = __mt7663s_xmit_queue(dev, e->skb->data, e->skb->len); + skb_walk_frags(e->skb, iter) { + memcpy(sdio->xmit_buf[qid] + len, iter->data, + iter->len); + len += iter->len; + nframes++; + } +next: + q->first = (q->first + 1) % q->ndesc; + e->done = true; + } + + if (nframes) { + memset(sdio->xmit_buf[qid] + len, 0, 4); + err = __mt7663s_xmit_queue(dev, sdio->xmit_buf[qid], len + 4); if (err) return err; - - e->done = true; - q->first = (q->first + 1) % q->ndesc; - nframes++; } mt7663s_tx_update_quota(sdio, qid, pse_sz, ple_sz); -- 2.26.2