On Mon, 19 Aug 2019 18:31:33 +0200, Thomas Bogendoerfer wrote: > Buffers alloacted by alloc_skb() are already cache aligned so there > is no need for an extra align done by ioc3_alloc_skb. And instead > of skb_put/skb_trim simply use one skb_put after frame size is known > during receive. > > Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@xxxxxxx> > --- > drivers/net/ethernet/sgi/ioc3-eth.c | 50 ++++++++----------------------------- > 1 file changed, 11 insertions(+), 39 deletions(-) > > diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c > index c875640926d6..d862f28887f9 100644 > --- a/drivers/net/ethernet/sgi/ioc3-eth.c > +++ b/drivers/net/ethernet/sgi/ioc3-eth.c > @@ -11,7 +11,6 @@ > * > * To do: > * > - * o Handle allocation failures in ioc3_alloc_skb() more gracefully. > * o Handle allocation failures in ioc3_init_rings(). > * o Use prefetching for large packets. What is a good lower limit for > * prefetching? > @@ -72,6 +71,12 @@ > #define TX_RING_ENTRIES 128 > #define TX_RING_MASK (TX_RING_ENTRIES - 1) > > +/* BEWARE: The IOC3 documentation documents the size of rx buffers as > + * 1644 while it's actually 1664. This one was nasty to track down... > + */ > +#define RX_OFFSET 10 > +#define RX_BUF_SIZE 1664 > + > #define ETCSR_FD ((17 << ETCSR_IPGR2_SHIFT) | (11 << ETCSR_IPGR1_SHIFT) | 21) > #define ETCSR_HD ((21 << ETCSR_IPGR2_SHIFT) | (21 << ETCSR_IPGR1_SHIFT) | 21) > > @@ -111,31 +116,6 @@ static void ioc3_init(struct net_device *dev); > static const char ioc3_str[] = "IOC3 Ethernet"; > static const struct ethtool_ops ioc3_ethtool_ops; > > -/* We use this to acquire receive skb's that we can DMA directly into. */ > - > -#define IOC3_CACHELINE 128UL Is the cache line on the platform this driver works on 128B? This looks like a DMA engine alignment requirement, more than an optimization. The comment in __alloc_skb() says: /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ note the "unless". > -static inline unsigned long aligned_rx_skb_addr(unsigned long addr) > -{ > - return (~addr + 1) & (IOC3_CACHELINE - 1UL); > -} > - > -static inline struct sk_buff *ioc3_alloc_skb(unsigned long length, > - unsigned int gfp_mask) > -{ > - struct sk_buff *skb; > - > - skb = alloc_skb(length + IOC3_CACHELINE - 1, gfp_mask); > - if (likely(skb)) { > - int offset = aligned_rx_skb_addr((unsigned long)skb->data); > - > - if (offset) > - skb_reserve(skb, offset); > - } > - > - return skb; > -} > - > static inline unsigned long ioc3_map(void *ptr, unsigned long vdev) > { > #ifdef CONFIG_SGI_IP27 > @@ -148,12 +128,6 @@ static inline unsigned long ioc3_map(void *ptr, unsigned long vdev) > #endif > } > > -/* BEWARE: The IOC3 documentation documents the size of rx buffers as > - * 1644 while it's actually 1664. This one was nasty to track down ... > - */ > -#define RX_OFFSET 10 > -#define RX_BUF_ALLOC_SIZE (1664 + RX_OFFSET + IOC3_CACHELINE) > - > #define IOC3_SIZE 0x100000 > > static inline u32 mcr_pack(u32 pulse, u32 sample) > @@ -534,10 +508,10 @@ static inline void ioc3_rx(struct net_device *dev) > err = be32_to_cpu(rxb->err); /* It's valid ... */ > if (err & ERXBUF_GOODPKT) { > len = ((w0 >> ERXBUF_BYTECNT_SHIFT) & 0x7ff) - 4; > - skb_trim(skb, len); > + skb_put(skb, len); > skb->protocol = eth_type_trans(skb, dev); > > - new_skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); > + new_skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC); > if (!new_skb) { > /* Ouch, drop packet and just recycle packet > * to keep the ring filled. > @@ -546,6 +520,7 @@ static inline void ioc3_rx(struct net_device *dev) > new_skb = skb; > goto next; > } > + new_skb->dev = dev; Assigning dev pointer seems unrelated to the rest of the patch? > if (likely(dev->features & NETIF_F_RXCSUM)) > ioc3_tcpudp_checksum(skb, > @@ -556,8 +531,6 @@ static inline void ioc3_rx(struct net_device *dev) > > ip->rx_skbs[rx_entry] = NULL; /* Poison */ > > - /* Because we reserve afterwards. */ > - skb_put(new_skb, (1664 + RX_OFFSET)); > rxb = (struct ioc3_erxbuf *)new_skb->data; > skb_reserve(new_skb, RX_OFFSET); > > @@ -846,16 +819,15 @@ static void ioc3_alloc_rings(struct net_device *dev) > for (i = 0; i < RX_BUFFS; i++) { > struct sk_buff *skb; > > - skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); > + skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC); > if (!skb) { > show_free_areas(0, NULL); > continue; > } > + skb->dev = dev; > > ip->rx_skbs[i] = skb; > > - /* Because we reserve afterwards. */ > - skb_put(skb, (1664 + RX_OFFSET)); > rxb = (struct ioc3_erxbuf *)skb->data; > rxr[i] = cpu_to_be64(ioc3_map(rxb, 1)); > skb_reserve(skb, RX_OFFSET);