On 2021-06-15 14:13, Jouni Malinen wrote:
From: P Praneesh <ppranees@xxxxxxxxxxxxxx>
tcl_data and reo_dst rings are currently being allocated
using dma_allocate_coherent() which is non cachable.
Allocating ring memory from cacheable memory area
allows cached descriptor access and prefetch next
descriptors to optimize CPU usage during
descriptor processing on NAPI.
Tested-on: QCN9074 hw1.0 PCI
WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1
Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@xxxxxxxxxxxxxx>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@xxxxxxxxxxxxxx>
Co-developed-by: Sriram R <srirrama@xxxxxxxxxxxxxx>
Signed-off-by: Sriram R <srirrama@xxxxxxxxxxxxxx>
Signed-off-by: P Praneesh <ppranees@xxxxxxxxxxxxxx>
Signed-off-by: Jouni Malinen <jouni@xxxxxxxxxxxxxx>
---
drivers/net/wireless/ath/ath11k/dp.c | 34 +++++++++++++++++++++++----
drivers/net/wireless/ath/ath11k/dp.h | 1 +
drivers/net/wireless/ath/ath11k/hal.c | 25 ++++++++++++++++++--
drivers/net/wireless/ath/ath11k/hal.h | 1 +
4 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/drivers/net/wireless/ath/ath11k/dp.c
b/drivers/net/wireless/ath/ath11k/dp.c
index b0c8f6290099..cf869ebc209a 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -101,8 +101,11 @@ void ath11k_dp_srng_cleanup(struct ath11k_base
*ab,
struct dp_srng *ring)
if (!ring->vaddr_unaligned)
return;
- dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
- ring->paddr_unaligned);
+ if (ring->cached)
+ kfree(ring->vaddr_unaligned);
+ else
+ dma_free_coherent(ab->dev, ring->size,
ring->vaddr_unaligned,
+ ring->paddr_unaligned);
ring->vaddr_unaligned = NULL;
}
@@ -222,6 +225,7 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
struct dp_srng *ring,
int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
int ret;
+ bool cached;
if (max_entries < 0 || entry_sz < 0)
return -EINVAL;
@@ -229,10 +233,25 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
struct dp_srng *ring,
if (num_entries > max_entries)
num_entries = max_entries;
+ /* Allocate the reo dst and tx completion rings from cacheable
memory */
+ switch (type) {
+ case HAL_REO_DST:
+ cached = true;
+ default:
+ cached = false;
+ }
+
ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
- ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
- &ring->paddr_unaligned,
- GFP_KERNEL);
+
+ if (cached) {
+ ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+ ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
Internal developers found this is causing a fault in rx data path.
Suggested fix:
- ring->paddr_unaligned =
virt_to_phys(ring->vaddr_unaligned);
+ ring->paddr_unaligned = dma_map_single(ab->dev,
ring->vaddr_unaligned,
+ ring->size,
DMA_FROM_DEVICE);
+ } else {
+ ring->vaddr_unaligned = dma_alloc_coherent(ab->dev,
ring->size,
+
&ring->paddr_unaligned,
+ GFP_KERNEL);
+ }
+
if (!ring->vaddr_unaligned)
return -ENOMEM;
@@ -292,6 +311,11 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab,
struct dp_srng *ring,
return -EINVAL;
}
+ if (cached) {
+ params.flags |= HAL_SRNG_FLAGS_CACHED;
+ ring->cached = 1;
+ }
+
ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, ¶ms);
if (ret < 0) {
ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
diff --git a/drivers/net/wireless/ath/ath11k/dp.h
b/drivers/net/wireless/ath/ath11k/dp.h
index ee768ccce46e..e6591488a28c 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -64,6 +64,7 @@ struct dp_srng {
dma_addr_t paddr;
int size;
u32 ring_id;
+ u8 cached;
};
struct dp_rxdma_ring {
diff --git a/drivers/net/wireless/ath/ath11k/hal.c
b/drivers/net/wireless/ath/ath11k/hal.c
index eaa0edca5576..a58e86e42b5b 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -627,6 +627,21 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base
*ab,
struct hal_srng *srng)
return NULL;
}
+static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
+ struct hal_srng *srng)
+{
+ u32 *desc;
+
+ /* prefetch only if desc is available */
+ desc = ath11k_hal_srng_dst_peek(ab, srng);
+ if (likely(desc)) {
+ dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+ (srng->entry_size * sizeof(u32)),
+ DMA_FROM_DEVICE);
+ prefetch(desc);
+ }
+}
+
u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
struct hal_srng *srng)
{
@@ -642,6 +657,10 @@ u32 *ath11k_hal_srng_dst_get_next_entry(struct
ath11k_base *ab,
srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
srng->ring_size;
+ /* Try to prefetch the next descriptor in the ring */
+ if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+ ath11k_hal_srng_prefetch_desc(ab, srng);
+
return desc;
}
@@ -775,11 +794,13 @@ void ath11k_hal_srng_access_begin(struct
ath11k_base
*ab, struct hal_srng *srng)
{
lockdep_assert_held(&srng->lock);
- if (srng->ring_dir == HAL_SRNG_DIR_SRC)
+ if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
srng->u.src_ring.cached_tp =
*(volatile u32 *)srng->u.src_ring.tp_addr;
- else
+ } else {
srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+ ath11k_hal_srng_prefetch_desc(ab, srng);
+ }
}
/* Update cached ring head/tail pointers to HW.
ath11k_hal_srng_access_begin()
diff --git a/drivers/net/wireless/ath/ath11k/hal.h
b/drivers/net/wireless/ath/ath11k/hal.h
index 35ed3a14e200..0f4f9ce74354 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -513,6 +513,7 @@ enum hal_srng_dir {
#define HAL_SRNG_FLAGS_DATA_TLV_SWAP 0x00000020
#define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN 0x00010000
#define HAL_SRNG_FLAGS_MSI_INTR 0x00020000
+#define HAL_SRNG_FLAGS_CACHED 0x20000000
#define HAL_SRNG_FLAGS_LMAC_RING 0x80000000
#define HAL_SRNG_TLV_HDR_TAG GENMASK(9, 1)
--
2.25.1
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum,
a Linux Foundation Collaborative Project