Hi, Vineeth, On 30.01.2024 12:48, Vineeth Karumanchi wrote: > When GEM is used as a wake device, it is not mandatory for the RX DMA > to be active. The RX engine in IP only needs to receive and identify > a wake packet through an interrupt. The wake packet is of no further > significance; hence, it is not required to be copied into memory. > By disabling RX DMA during suspend, we can avoid unnecessary DMA > processing of any incoming traffic. > > During suspend, perform either of the below operations: > > tie-off/dummy descriptor: Disable unused queues by connecting > them to a looped descriptor chain without free slots. > > queue disable: The newer IP version allows disabling individual queues. I would add a dash line for each of these 2 items for better understanding. E.g.: - tie-off/dummy descriptior: ... - queue disable: ... > > Co-developed-by: Harini Katakam <harini.katakam@xxxxxxx> > Signed-off-by: Harini Katakam <harini.katakam@xxxxxxx> > Signed-off-by: Vineeth Karumanchi <vineeth.karumanchi@xxxxxxx> > --- > drivers/net/ethernet/cadence/macb.h | 7 +++ > drivers/net/ethernet/cadence/macb_main.c | 58 +++++++++++++++++++++++- > 2 files changed, 64 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h > index aa5700ac9c00..f68ff163aa18 100644 > --- a/drivers/net/ethernet/cadence/macb.h > +++ b/drivers/net/ethernet/cadence/macb.h > @@ -645,6 +645,9 @@ > #define GEM_T2OFST_OFFSET 0 /* offset value */ > #define GEM_T2OFST_SIZE 7 > > +/* Bitfields in queue pointer registers */ > +#define GEM_RBQP_DISABLE BIT(0) You have spaces after macro name. However the approach in this driver is to define bit offset and lenght and use {MACB,GEM}_BIT() macros (see the above bitfield definitions). > + > /* Offset for screener type 2 compare values (T2CMPOFST). > * Note the offset is applied after the specified point, > * e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset > @@ -737,6 +740,7 @@ > #define MACB_CAPS_HIGH_SPEED 0x02000000 > #define MACB_CAPS_CLK_HW_CHG 0x04000000 > #define MACB_CAPS_MACB_IS_EMAC 0x08000000 > +#define MACB_CAPS_QUEUE_DISABLE 0x00002000 Can you keep this sorted by value with the rest of the caps? > #define MACB_CAPS_FIFO_MODE 0x10000000 > #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 > #define MACB_CAPS_SG_DISABLED 0x40000000 > @@ -1254,6 +1258,7 @@ struct macb { > u32 (*macb_reg_readl)(struct macb *bp, int offset); > void (*macb_reg_writel)(struct macb *bp, int offset, u32 value); > > + struct macb_dma_desc *rx_ring_tieoff; Keep this ^ > size_t rx_buffer_size; > > unsigned int rx_ring_size; > @@ -1276,6 +1281,8 @@ struct macb { > struct gem_stats gem; > } hw_stats; > > + dma_addr_t rx_ring_tieoff_dma; And this ^ toghether. > + > struct macb_or_gem_ops macbgem_ops; > > struct mii_bus *mii_bus; > diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c > index 898debfd4db3..47cbea58e6c3 100644 > --- a/drivers/net/ethernet/cadence/macb_main.c > +++ b/drivers/net/ethernet/cadence/macb_main.c > @@ -2479,6 +2479,12 @@ static void macb_free_consistent(struct macb *bp) > > bp->macbgem_ops.mog_free_rx_buffers(bp); > > + if (bp->rx_ring_tieoff) { > + dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp), > + bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma); > + bp->rx_ring_tieoff = NULL; > + } > + Keep the reverse order of operation as oposed to macb_alloc_consistent, thus move this before bp->macbgem_ops.mog_free_rx_buffers(); > for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { > kfree(queue->tx_skb); > queue->tx_skb = NULL; > @@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp) > if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) > goto out_err; > > + /* Required for tie off descriptor for PM cases */ > + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) { > + bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev, > + macb_dma_desc_get_size(bp), > + &bp->rx_ring_tieoff_dma, > + GFP_KERNEL); > + if (!bp->rx_ring_tieoff) > + goto out_err; You also need to free the previously allocated rx buffers. > + } > + > return 0; > > out_err: > @@ -2575,6 +2591,16 @@ static int macb_alloc_consistent(struct macb *bp) > return -ENOMEM; > } > > +static void macb_init_tieoff(struct macb *bp) > +{ > + struct macb_dma_desc *d = bp->rx_ring_tieoff; s/d/desc to cope with the rest of descriptor usage in this file. Add this here: if (bp->caps & MACB_CAPS_QUEUE_DISABLE) return; to avoid checking it in different places where this function is called. > + /* Setup a wrapping descriptor with no free slots > + * (WRAP and USED) to tie off/disable unused RX queues. > + */ > + macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED)); > + d->ctrl = 0; > +} > + > static void gem_init_rings(struct macb *bp) > { > struct macb_queue *queue; > @@ -2598,6 +2624,8 @@ static void gem_init_rings(struct macb *bp) > gem_rx_refill(queue); > } > > + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) > + macb_init_tieoff(bp); > } > > static void macb_init_rings(struct macb *bp) > @@ -2615,6 +2643,8 @@ static void macb_init_rings(struct macb *bp) > bp->queues[0].tx_head = 0; > bp->queues[0].tx_tail = 0; > desc->ctrl |= MACB_BIT(TX_WRAP); > + > + macb_init_tieoff(bp); > } > > static void macb_reset_hw(struct macb *bp) > @@ -4917,7 +4947,8 @@ static const struct macb_config sama7g5_emac_config = { > > static const struct macb_config versal_config = { > .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | > - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK, > + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | > + MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_NEED_TSUCLK, This should go in a different patch. > .dma_burst_length = 16, > .clk_init = macb_clk_init, > .init = init_reset_optional, > @@ -5214,6 +5245,7 @@ static int __maybe_unused macb_suspend(struct device *dev) > struct macb_queue *queue; > unsigned long flags; > unsigned int q; > + u32 ctrlmask; val/tmp should be enough for this as you are, anyway, re-using it in the next patch for different purpose. > int err; > > if (!device_may_wakeup(&bp->dev->dev)) > @@ -5224,6 +5256,30 @@ static int __maybe_unused macb_suspend(struct device *dev) > > if (bp->wol & MACB_WOL_ENABLED) { > spin_lock_irqsave(&bp->lock, flags); > + > + /* Disable Tx and Rx engines before disabling the queues, > + * this is mandatory as per the IP spec sheet > + */ > + ctrlmask = macb_readl(bp, NCR); > + ctrlmask &= ~(MACB_BIT(TE) | MACB_BIT(RE)); You can remove this > + macb_writel(bp, NCR, ctrlmask); And add this: macb_writel(bp, NCR, ctrlmask & ~(MACB_BIT(TE) | MACB_BIT(RE)); > + for (q = 0, queue = bp->queues; q < bp->num_queues; > + ++q, ++queue) { > + /* Disable RX queues */ Operation in this for loop could be moved in the the above IRQ disable loop. Have you tried it? are there any issues with it? > + if (bp->caps & MACB_CAPS_QUEUE_DISABLE) { > + queue_writel(queue, RBQP, GEM_RBQP_DISABLE); > + } else { > + /* Tie off RX queues */ > + queue_writel(queue, RBQP, > + lower_32_bits(bp->rx_ring_tieoff_dma)); I think this should be guarded by: #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT > + queue_writel(queue, RBQPH, > + upper_32_bits(bp->rx_ring_tieoff_dma)); > + } > + } > + /* Enable Receive engine */ > + ctrlmask = macb_readl(bp, NCR); Is this needed? > + ctrlmask |= MACB_BIT(RE); > + macb_writel(bp, NCR, ctrlmask); These could be merged > /* Flush all status bits */ > macb_writel(bp, TSR, -1); > macb_writel(bp, RSR, -1);