On Sun, Jan 19, 2025 at 8:42 AM P Praneesh <quic_ppranees@xxxxxxxxxxx> wrote: > > Currently, the driver allocates cacheable DMA buffers for the rx_tid > structure using kzalloc() and dma_map_single(). These buffers are > long-lived and can persist for the lifetime of the peer, which is not > advisable. Instead of using kzalloc() and dma_map_single() for allocating > cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the > allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid. > Since dma_alloc_noncoherent() returns unaligned physical and virtual > addresses, align them internally before use within the driver. This > ensures proper allocation of non-coherent memory through the kernel > helper. > > Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 > Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 > > Signed-off-by: P Praneesh <quic_ppranees@xxxxxxxxxxx> > --- > drivers/net/wireless/ath/ath11k/dp.h | 6 +- > drivers/net/wireless/ath/ath11k/dp_rx.c | 117 +++++++++++------------- > 2 files changed, 58 insertions(+), 65 deletions(-) > > diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h > index f777314db8b3..7a55afd33be8 100644 > --- a/drivers/net/wireless/ath/ath11k/dp.h > +++ b/drivers/net/wireless/ath/ath11k/dp.h > @@ -1,7 +1,7 @@ > /* SPDX-License-Identifier: BSD-3-Clause-Clear */ > /* > * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. > - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. > + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. > */ > > #ifndef ATH11K_DP_H > @@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp; > > struct dp_rx_tid { > u8 tid; > - u32 *vaddr; > dma_addr_t paddr; > u32 size; > u32 ba_win_sz; > @@ -37,6 +36,9 @@ struct dp_rx_tid { > /* Timer info related to fragments */ > struct timer_list frag_timer; > struct ath11k_base *ab; > + u32 *vaddr_unaligned; > + dma_addr_t paddr_unaligned; > + u32 unaligned_size; > }; > > #define DP_REO_DESC_FREE_THRESHOLD 64 > diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c > index 029ecf51c9ef..5e71e5d9ecb7 100644 > --- a/drivers/net/wireless/ath/ath11k/dp_rx.c > +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c > @@ -1,7 +1,7 @@ > // SPDX-License-Identifier: BSD-3-Clause-Clear > /* > * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. > - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. > + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. > */ > > #include <linux/ieee80211.h> > @@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab) > list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) { > list_del(&cmd->list); > rx_tid = &cmd->data; > - if (rx_tid->vaddr) { > - dma_unmap_single(ab->dev, rx_tid->paddr, > - rx_tid->size, DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + if (rx_tid->vaddr_unaligned) { > + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > kfree(cmd); > } > @@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab) > list_del(&cmd_cache->list); > dp->reo_cmd_cache_flush_count--; > rx_tid = &cmd_cache->data; > - if (rx_tid->vaddr) { > - dma_unmap_single(ab->dev, rx_tid->paddr, > - rx_tid->size, DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + if (rx_tid->vaddr_unaligned) { > + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > kfree(cmd_cache); > } > @@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struct ath11k_dp *dp, void *ctx, > if (status != HAL_REO_CMD_SUCCESS) > ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n", > rx_tid->tid, status); > - if (rx_tid->vaddr) { > - dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size, > - DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + if (rx_tid->vaddr_unaligned) { > + dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > } > > @@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(struct ath11k_base *ab, > if (ret) { > ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n", > rx_tid->tid, ret); > - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, > - DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > } > > @@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(struct ath11k_dp *dp, void *ctx, > > return; > free_desc: > - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, > - DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > > void ath11k_peer_rx_tid_delete(struct ath11k *ar, > @@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct ath11k *ar, > if (ret != -ESHUTDOWN) > ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n", > tid, ret); > - dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size, > - DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size, > + rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > } > > rx_tid->paddr = 0; > + rx_tid->paddr_unaligned = 0; > rx_tid->size = 0; > + rx_tid->unaligned_size = 0; > } > > static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab, > @@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(struct ath11k_base *ab, > if (!rx_tid->active) > goto unlock_exit; > > - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, > - DMA_BIDIRECTIONAL); > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > + dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned, > + rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL); > + rx_tid->vaddr_unaligned = NULL; > > rx_tid->active = false; > > @@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id, > struct ath11k_base *ab = ar->ab; > struct ath11k_peer *peer; > struct dp_rx_tid *rx_tid; > - u32 hw_desc_sz; > - u32 *addr_aligned; > - void *vaddr; > + u32 hw_desc_sz, *vaddr; > + void *vaddr_unaligned; > dma_addr_t paddr; > int ret; > > @@ -1050,49 +1050,40 @@ int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id, > else > hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid); > > - vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC); > - if (!vaddr) { > + rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1; > + vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr, > + DMA_BIDIRECTIONAL, GFP_ATOMIC); > + if (!vaddr_unaligned) { > spin_unlock_bh(&ab->base_lock); > return -ENOMEM; > } > > - addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN); > - > - ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz, > - ssn, pn_type); > - > - paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz, > - DMA_BIDIRECTIONAL); > - > - ret = dma_mapping_error(ab->dev, paddr); > - if (ret) { > - spin_unlock_bh(&ab->base_lock); > - ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n", > - peer_mac, tid, ret); > - goto err_mem_free; > - } > - > - rx_tid->vaddr = vaddr; > - rx_tid->paddr = paddr; > + rx_tid->vaddr_unaligned = vaddr_unaligned; > + vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN); > + rx_tid->paddr_unaligned = paddr; > + rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr - > + (unsigned long)rx_tid->vaddr_unaligned); > + ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type); > rx_tid->size = hw_desc_sz; > rx_tid->active = true; > > + /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup. > + * Since these changes are not reflected in the device, driver now needs to > + * explicitly call dma_sync_single_for_device. > + */ > + dma_sync_single_for_device(ab->dev, rx_tid->paddr, > + rx_tid->size, > + DMA_TO_DEVICE); > spin_unlock_bh(&ab->base_lock); > > - ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, > - paddr, tid, 1, ba_win_sz); > + ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr, > + tid, 1, ba_win_sz); > if (ret) { > ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n", > peer_mac, tid, ret); > ath11k_dp_rx_tid_mem_free(ab, peer_mac, vdev_id, tid); > } > > - return ret; > - > -err_mem_free: > - kfree(rx_tid->vaddr); > - rx_tid->vaddr = NULL; > - > return ret; > } > > -- > 2.34.1 > I tested this with qcn9074 on an imx8mm (no iommu) with 4GiB DRAM and verified swiotlb was not being used and infrastructure mode worked fine. Tested-By: Tim Harvey <tharvey@xxxxxxxxxxxxx> Best regards, Tim