Re: [PATCH v5 01/11] mt76: add hash lookup for skb on TXS status_list

Felix Fietkau <nbd@xxxxxxxx> · Fri, 13 Aug 2021 18:50:53 +0200

On 2021-08-04 15:44, greearb@xxxxxxxxxxxxxxx wrote:
> From: Ben Greear <greearb@xxxxxxxxxxxxxxx>
> 
> This improves performance when sending lots of frames that
> are requesting being mapped to a TXS callback.
> 
> Add comments to help next person understood the tx path
> better.
> 
> Signed-off-by: Ben Greear <greearb@xxxxxxxxxxxxxxx>
> ---
> 
> v5:  Rebased on top of previous series.
> 
>  drivers/net/wireless/mediatek/mt76/mt76.h     | 48 +++++++---
>  .../net/wireless/mediatek/mt76/mt7603/mac.c   |  2 +-
>  .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-
>  .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  2 +-
>  .../net/wireless/mediatek/mt76/mt7915/mac.c   |  8 +-
>  .../net/wireless/mediatek/mt76/mt7921/mac.c   |  9 +-
>  drivers/net/wireless/mediatek/mt76/tx.c       | 90 ++++++++++++++++---
>  7 files changed, 132 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 436bf2b8e2cd..016f563fec39 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);
>  #define MT_WCID_TX_INFO_TXPWR_ADJ	GENMASK(25, 18)
>  #define MT_WCID_TX_INFO_SET		BIT(31)
>  
> +#define MT_PACKET_ID_MASK		GENMASK(6, 0)
> +#define MT_PACKET_ID_NO_ACK		0
> +/* Request TXS, but don't try to match with skb. */
> +#define MT_PACKET_ID_NO_SKB		1
> +#define MT_PACKET_ID_FIRST		2
> +#define MT_PACKET_ID_HAS_RATE		BIT(7)
> +#define MT_PACKET_ID_MAX		(GENMASK(7, 0) - 1)
> +
>  struct mt76_wcid {
>  	struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];
>  
> @@ -246,6 +254,8 @@ struct mt76_wcid {
>  
>  	struct rate_info rate;
>  
> +	struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
You could add this to reduce the struct size:
#define MT_NUM_STATUS_PACKETS \
	(MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)

And then subtract MT_PACKET_ID_FIRST from cache entries.

>  	u16 idx;
>  	u8 hw_key_idx;
>  	u8 hw_key_idx2;
> @@ -302,13 +312,8 @@ struct mt76_rx_tid {
>  #define MT_TX_CB_TXS_DONE		BIT(1)
>  #define MT_TX_CB_TXS_FAILED		BIT(2)
>  
> -#define MT_PACKET_ID_MASK		GENMASK(6, 0)
> -#define MT_PACKET_ID_NO_ACK		0
> -#define MT_PACKET_ID_NO_SKB		1
> -#define MT_PACKET_ID_FIRST		2
> -#define MT_PACKET_ID_HAS_RATE		BIT(7)
> -
> -#define MT_TX_STATUS_SKB_TIMEOUT	HZ
> +/* This is timer for when to give up when waiting for TXS callback. */
> +#define MT_TX_STATUS_SKB_TIMEOUT	(HZ / 8)
I think the way timeouts are checked now, HZ/8 is way too short.
I would recommend checking timeout only for packets where
MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within
__mt76_tx_status_skb_done on DMA completion. That should make it
possible to keep the timeout short without running into it in cases
where significant congestion adds huge completion latency.

> @@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)
>  }
>  
>  static inline int
> -mt76_get_next_pkt_id(struct mt76_wcid *wcid)
> +mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,
> +		     struct sk_buff *skb)
>  {
> +	struct sk_buff *qskb;
> +
> +	lockdep_assert_held(&dev->status_list.lock);
> +
>  	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
> -	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
> -	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
> +	if (wcid->packet_id < MT_PACKET_ID_FIRST)
>  		wcid->packet_id = MT_PACKET_ID_FIRST;
>  
> +	qskb = wcid->skb_status_array[wcid->packet_id];
> +	if (qskb) {
> +		/* bummer, already waiting on this pid.  See if it is stale. */
> +		struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);
> +
> +		if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {
> +			/* ok, not stale.  Increment pid anyway, will try next
> +			 * slot next time
> +			 */
> +			return MT_PACKET_ID_NO_SKB;
> +		}
> +	}
> +
> +	/* cache this skb for fast lookup by packet-id */
> +	wcid->skb_status_array[wcid->packet_id] = skb;
> +
I think mt76_get_next_pkt_id is not a good place for caching the skb.
Better cache it in the same place that also puts the skb in the status
list: mt76_tx_status_skb_add

That way you can drop your (possibly broken) changes to mt7921, which
calls mt76_get_next_pkt_id directly, but does not support tx status
tracking for skbs.

> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> index d9f52e2611a7..8f5702981900 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>  
>  	mt76_tx_status_lock(mdev, &list);
>  	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
> +
> +	/* TODO:  Gather stats anyway, even if we are not matching on an skb. */
Please drop this comment, since you're deleting in another patch in this
series anyway.

> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
>  		stats->tx_bw[0]++;
>  		break;
>  	}
> +
> +	/* Cache rate for packets that don't get a TXS callback for some
> +	 * reason.
> +	 */
>  	wcid->rate = rate;
That comment is wrong, wcid->rate is cached because HE rates can't be
reported via skb->cb due to lack of space.

> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
> index 6f302acb6e69..4c8504d3c904 100644
> --- a/drivers/net/wireless/mediatek/mt76/tx.c
> +++ b/drivers/net/wireless/mediatek/mt76/tx.c
> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
>  			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
>  		return MT_PACKET_ID_NO_SKB;
>  
> +	/* due to limited range of the pktid (7 bits), we can only
> +	 * have a limited number of outstanding frames.  I think it is OK to
> +	 * check the length outside of a lock since it doesn't matter too much
> +	 * if we read wrong data here.
> +	 * The TX-status callbacks don't always return a callback for an SKB,
> +	 * so the status_list may contain some stale skbs.  Those will be cleaned
> +	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
> +	 */
> +
> +	qlen = skb_queue_len(&dev->status_list);
> +	if (qlen > 120)
> +		return MT_PACKET_ID_NO_SKB;
Checking the length of the per-device status list doesn't make sense,
since pktid allocation is per-wcid.

- Felix