On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote: > On Dec 21, Felix Fietkau wrote: > > Hi Sujuan, > > > > > Yes, it is so expensive, but if no memcopy, it will casue memory > > > fragmentation (we hit this issue in internal SQC). > > > > > > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer > > > manager) with wifi driver(dma rx data queue) by exchanging wfdma > > > dmad > > > to ensure the free wed rx buffer. > > > > > > it is possiable that a large number of buffer has been exchanged > > > to wed > > > and can not come back to wlan driver. So, the memory from the > > > same 32K > > > page cache is unable to be released, and it will be failed at > > > page_frag_alloc in mt76_dma_rx_fill. > > > > > > Any ideas but memcopy? > > > > A simple solution would be to simply allocate single pages, or > > half-page fragments. > > > > - Felix > > > > A simple approach would be allocating a single page for each rx > buffer. > > @Sujuan: can you please double check if it is ok from performance and > memory > fragmentation point of view? If not I guess we can try to > optimize it > and allocate multiple buffers in the same page tweeking page > refcount. > > (this patch must be applied on top of Felix's dma fix). > Allocating single page for each rx buffer avoids memory fragmentation, but it always uses 4K for one rx pkt which only needs 2K, right? I guess performance would be worse without page cache. We have tested on the mtk private driver, 7% drop in throughput when setting the 4K page cache compared to the 32K page cache. and 10% drop when use slab to allocate buffer. A single page per rx buffer may cause a throughput drop of over 7% and waste memory, what do you think? Regards, Sujuan > Regards, > Lorenzo > > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c > b/drivers/net/wireless/mediatek/mt76/dma.c > index 28a7fe064313..1d9e580977fc 100644 > --- a/drivers/net/wireless/mediatek/mt76/dma.c > +++ b/drivers/net/wireless/mediatek/mt76/dma.c > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, > struct mt76_queue *q, > return ret; > } > > +static void * > +mt76_dma_get_rx_buf(struct mt76_queue *q) > +{ > + if ((q->flags & MT_QFLAG_WED) && > + FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) { > + /* WED RX queue */ > + struct page *page = dev_alloc_page(); > + > + return page ? page_address(page) : NULL; > + } > + > + return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC); > +} > + > static int > mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q) > { > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct > mt76_queue *q) > struct mt76_queue_buf qbuf; > void *buf = NULL; > > - buf = page_frag_alloc(&q->rx_page, q->buf_size, > GFP_ATOMIC); > + buf = mt76_dma_get_rx_buf(q); > if (!buf) > break; > > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c > index 1a2e4df8d1b5..2924e71e4fbe 100644 > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c > @@ -594,13 +594,9 @@ static void > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed) > static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device > *wed) > { > struct mt7915_dev *dev; > - u32 length; > int i; > > dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed); > - length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size + > - sizeof(struct skb_shared_info)); > - > for (i = 0; i < dev->mt76.rx_token_size; i++) { > struct mt76_txwi_cache *t; > > @@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct > mtk_wed_device *wed) > > dma_unmap_single(dev->mt76.dma_dev, t->dma_addr, > wed->wlan.rx_size, DMA_FROM_DEVICE); > - __free_pages(virt_to_page(t->ptr), get_order(length)); > + free_page(virt_to_page(t->ptr)); > t->ptr = NULL; > > mt76_put_rxwi(&dev->mt76, t); > @@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct > mtk_wed_device *wed, int size) > { > struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc; > struct mt7915_dev *dev; > - u32 length; > int i; > > dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed); > - length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size + > - sizeof(struct skb_shared_info)); > - > for (i = 0; i < size; i++) { > struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76); > dma_addr_t phy_addr; > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct > mtk_wed_device *wed, int size) > int token; > void *ptr; > > - page = __dev_alloc_pages(GFP_KERNEL, > get_order(length)); > + page = __dev_alloc_page(GFP_KERNEL); > if (!page) > goto unmap; > > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct > mtk_wed_device *wed, int size) > wed->wlan.rx_size, > DMA_TO_DEVICE); > if (unlikely(dma_mapping_error(dev->mt76.dev, > phy_addr))) { > - __free_pages(page, get_order(length)); > + free_page(page); > goto unmap; > } > > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct > mtk_wed_device *wed, int size) > if (token < 0) { > dma_unmap_single(dev->mt76.dma_dev, phy_addr, > wed->wlan.rx_size, > DMA_TO_DEVICE); > - __free_pages(page, get_order(length)); > + free_page(page); > goto unmap; > } >