On Sun, Jul 14, 2024, at 10:38 PM, Baochen Qiang wrote: > In transmit path, it is likely that the iova is not aligned to PCIe TLP > max payload size, which is 128 for WCN7850. Normally in such cases hardware > is expected to split the packet into several parts in a manner such that > they, other than the first one, have aligned iova. However due to hardware > limitations, WCN7850 does not behave like that properly with some specific > unaligned iova in transmit path. This easily results in target hang in a > KPI transmit test: packet send/receive failure, WMI command send timeout > etc. Also fatal error seen in PCIe level: > > ... > Capabilities: ... > ... > DevSta: ... FatalErr+ ... > ... > ... > > Work around this by manually moving/reallocating payload buffer such that > we can map it to a 128 bytes aligned iova. The moving requires sufficient > head room or tail room in skb: for the former we can do ourselves a favor > by asking some extra bytes when registering with mac80211, while for the > latter we can do nothing. > > Moving/reallocating buffer consumes additional CPU cycles, but the good news > is that an aligned iova increases PCIe efficiency. In my tests on some X86 > platforms the KPI results are almost consistent. > > Since this is seen only with WCN7850, add a new hardware parameter to > differentiate from others. > > Tested-on: WCN7850 hw2.0 PCI > WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > Signed-off-by: Baochen Qiang <quic_bqiang@xxxxxxxxxxx> > --- > drivers/net/wireless/ath/ath12k/dp_tx.c | 72 +++++++++++++++++++++++++ > drivers/net/wireless/ath/ath12k/hw.c | 6 +++ > drivers/net/wireless/ath/ath12k/hw.h | 4 ++ > drivers/net/wireless/ath/ath12k/mac.c | 1 + > 4 files changed, 83 insertions(+) > > > base-commit: db1ce56e6e1d395dd42a3cd6332a871d9be59c45 > > diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c > b/drivers/net/wireless/ath/ath12k/dp_tx.c > index d08c04343e90..00475d0848e1 100644 > --- a/drivers/net/wireless/ath/ath12k/dp_tx.c > +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c > @@ -162,6 +162,60 @@ static int ath12k_dp_prepare_htt_metadata(struct > sk_buff *skb) > return 0; > } > > +static void ath12k_dp_tx_move_payload(struct sk_buff *skb, > + unsigned long delta, > + bool head) > +{ > + unsigned long len = skb->len; > + > + if (head) { > + skb_push(skb, delta); > + memmove(skb->data, skb->data + delta, len); > + skb_trim(skb, len); > + } else { > + skb_put(skb, delta); > + memmove(skb->data + delta, skb->data, len); > + skb_pull(skb, delta); > + } > +} > + > +static int ath12k_dp_tx_align_payload(struct ath12k_base *ab, > + struct sk_buff **pskb) > +{ > + u32 iova_mask = ab->hw_params->iova_mask; > + unsigned long offset, delta1, delta2; > + struct sk_buff *skb2, *skb = *pskb; > + unsigned int headroom = skb_headroom(skb); > + int tailroom = skb_tailroom(skb); > + int ret = 0; > + > + offset = (unsigned long)skb->data & iova_mask; > + delta1 = offset; > + delta2 = iova_mask - offset + 1; > + > + if (headroom >= delta1) { > + ath12k_dp_tx_move_payload(skb, delta1, true); > + } else if (tailroom >= delta2) { > + ath12k_dp_tx_move_payload(skb, delta2, false); > + } else { > + skb2 = skb_realloc_headroom(skb, iova_mask); > + if (!skb2) { > + ret = -ENOMEM; > + goto out; > + } > + > + dev_kfree_skb_any(skb); > + > + offset = (unsigned long)skb2->data & iova_mask; > + if (offset) > + ath12k_dp_tx_move_payload(skb2, offset, true); > + *pskb = skb2; > + } > + > +out: > + return ret; > +} > + > int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, > struct sk_buff *skb) > { > @@ -184,6 +238,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct > ath12k_vif *arvif, > bool tcl_ring_retry; > bool msdu_ext_desc = false; > bool add_htt_metadata = false; > + u32 iova_mask = ab->hw_params->iova_mask; > > if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) > return -ESHUTDOWN; > @@ -279,6 +334,23 @@ int ath12k_dp_tx(struct ath12k *ar, struct > ath12k_vif *arvif, > goto fail_remove_tx_buf; > } > > + if (iova_mask && > + (unsigned long)skb->data & iova_mask) { > + ret = ath12k_dp_tx_align_payload(ab, &skb); > + if (ret) { > + dev_warn_once(ab->dev, "failed to align TX buffer %d\n", ret); > + /* don't bail out, give original buffer > + * a chance even unaligned. > + */ > + goto map; > + } > + > + /* hdr is pointing to a wrong place after alignment, > + * so refresh it for later use. > + */ > + hdr = (void *)skb->data; > + } > +map: > ti.paddr = dma_map_single(ab->dev, skb->data, skb->len, > DMA_TO_DEVICE); > if (dma_mapping_error(ab->dev, ti.paddr)) { > atomic_inc(&ab->soc_stats.tx_err.misc_fail); > diff --git a/drivers/net/wireless/ath/ath12k/hw.c > b/drivers/net/wireless/ath/ath12k/hw.c > index 2e11ea763574..7b3e2420e3c5 100644 > --- a/drivers/net/wireless/ath/ath12k/hw.c > +++ b/drivers/net/wireless/ath/ath12k/hw.c > @@ -924,6 +924,8 @@ static const struct ath12k_hw_params > ath12k_hw_params[] = { > > .acpi_guid = NULL, > .supports_dynamic_smps_6ghz = true, > + > + .iova_mask = 0, > }, > { > .name = "wcn7850 hw2.0", > @@ -1000,6 +1002,8 @@ static const struct ath12k_hw_params > ath12k_hw_params[] = { > > .acpi_guid = &wcn7850_uuid, > .supports_dynamic_smps_6ghz = false, > + > + .iova_mask = PCIE_MAX_PAYLOAD_SIZE - 1, > }, > { > .name = "qcn9274 hw2.0", > @@ -1072,6 +1076,8 @@ static const struct ath12k_hw_params > ath12k_hw_params[] = { > > .acpi_guid = NULL, > .supports_dynamic_smps_6ghz = true, > + > + .iova_mask = 0, > }, > }; > > diff --git a/drivers/net/wireless/ath/ath12k/hw.h > b/drivers/net/wireless/ath/ath12k/hw.h > index e792eb6b249b..49668aa0efc8 100644 > --- a/drivers/net/wireless/ath/ath12k/hw.h > +++ b/drivers/net/wireless/ath/ath12k/hw.h > @@ -96,6 +96,8 @@ > #define ATH12K_M3_FILE "m3.bin" > #define ATH12K_REGDB_FILE_NAME "regdb.bin" > > +#define PCIE_MAX_PAYLOAD_SIZE 128 > + > enum ath12k_hw_rate_cck { > ATH12K_HW_RATE_CCK_LP_11M = 0, > ATH12K_HW_RATE_CCK_LP_5_5M, > @@ -215,6 +217,8 @@ struct ath12k_hw_params { > > const guid_t *acpi_guid; > bool supports_dynamic_smps_6ghz; > + > + u32 iova_mask; > }; > > struct ath12k_hw_ops { > diff --git a/drivers/net/wireless/ath/ath12k/mac.c > b/drivers/net/wireless/ath/ath12k/mac.c > index 8106297f0bc1..ce41c8153080 100644 > --- a/drivers/net/wireless/ath/ath12k/mac.c > +++ b/drivers/net/wireless/ath/ath12k/mac.c > @@ -9193,6 +9193,7 @@ static int ath12k_mac_hw_register(struct > ath12k_hw *ah) > > hw->vif_data_size = sizeof(struct ath12k_vif); > hw->sta_data_size = sizeof(struct ath12k_sta); > + hw->extra_tx_headroom = ab->hw_params->iova_mask; > > wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); > wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); We've tested this in the Lenovo lab using the T14 G5 AMD with a 6.10.0-rc7+ kernel from wireless-next and this patch applied. Previously we had stability issues under traffic load. With the patch applied we can no longer reproduce the issue. Tested-by: Mark Pearson <mpearson-lenovo@xxxxxxxxx> Can this be tagged for stable backporting? It's an important fix. Thanks for getting this fix done. Very much appreciated. Mark