I've cleaned up the winbond-840 interrupt handler and added a very simple interrupt avoidance scheme: * if more than 1.333*packet_limit packets arrive per second, then switch off the per-packet rx interrupt, and poll with a 500 us timer. * if the number of packets falls below packet_limit, switch back to rx interrupts. packet limit is right now 2000. Result: single stream tcp receive performance up from 9.2 MB/sec to 10.5 MB/sec (mtu 1500). In both cases, 0 packet drops (as expected - one tcp streams with a window of 64 kB and a 64 entry rx queue makes packet drops impossible) IMHO that shows that packet drops are not the right sign when to switch on interrupt mitigation - packets aren't dropped, instead the tcp sender just sends packets slower and performance goes down. -- Manfred
--- 2.4/drivers/net/winbond-840.c Tue Oct 2 19:58:10 2001 +++ build-2.4/drivers/net/winbond-840.c Sat Oct 6 10:37:06 2001 @@ -63,7 +63,6 @@ These may be modified when a driver module is loaded.*/ static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ -static int max_interrupt_work = 20; /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). The '840 uses a 64 element hash table based on the Ethernet CRC. */ static int multicast_filter_limit = 32; @@ -72,6 +71,7 @@ Setting to > 1518 effectively disables this feature. */ static int rx_copybreak; +static int packet_limit = 2000; /* Used to pass the media type, etc. Both 'options[]' and 'full_duplex[]' should exist for driver interoperability. @@ -91,7 +91,7 @@ #define TX_RING_SIZE 16 #define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */ #define TX_QUEUE_LEN_RESTART 5 -#define RX_RING_SIZE 32 +#define RX_RING_SIZE 64 #define TX_BUFLIMIT (1024-128) @@ -151,15 +151,15 @@ MODULE_DESCRIPTION("Winbond W89c840 Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_PARM(max_interrupt_work, "i"); MODULE_PARM(debug, "i"); MODULE_PARM(rx_copybreak, "i"); MODULE_PARM(multicast_filter_limit, "i"); +MODULE_PARM(packet_limit, "i"); MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i"); MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i"); -MODULE_PARM_DESC(max_interrupt_work, "winbond-840 maximum events handled per interrupt"); MODULE_PARM_DESC(debug, "winbond-840 debug level (0-6)"); MODULE_PARM_DESC(rx_copybreak, "winbond-840 copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(packet_limit, "winbond-840 static interrupt mitigation"); MODULE_PARM_DESC(multicast_filter_limit, "winbond-840 maximum number of filtered multicast addresses"); MODULE_PARM_DESC(options, "winbond-840: Bits 0-3: media type, bit 17: full duplex"); MODULE_PARM_DESC(full_duplex, "winbond-840 full duplex setting(s) (1)"); @@ -304,6 +304,8 @@ TxFIFOUnderflow=0x20, RxErrIntr=0x10, TxIdle=0x04, IntrTxStopped=0x02, IntrTxDone=0x01, }; +#define INTR_MASK_NORMAL 0x1A0F1 +#define INTR_MASK_MITIGATED 0x1A8B1 /* Bits in the NetworkConfig register. */ enum rx_mode_bits { @@ -338,13 +340,12 @@ DescIntr=0x80000000, }; -#define PRIV_ALIGN 15 /* Required alignment mask */ #define MII_CNT 1 /* winbond only supports one MII */ struct netdev_private { struct w840_rx_desc *rx_ring; dma_addr_t rx_addr[RX_RING_SIZE]; struct w840_tx_desc *tx_ring; - dma_addr_t tx_addr[RX_RING_SIZE]; + dma_addr_t tx_addr[TX_RING_SIZE]; dma_addr_t ring_dma_addr; /* The addresses of receive-in-place skbuffs. */ struct sk_buff* rx_skbuff[RX_RING_SIZE]; @@ -371,6 +372,9 @@ u16 advertising; /* NWay media advertisement */ unsigned char phys[MII_CNT]; /* MII device addresses, but only the first is used */ u32 mii; + int jiffy_packets; + int mitigate; + int jiffies; }; static int eeprom_read(long ioaddr, int location); @@ -970,8 +974,8 @@ update_csr6(dev, 0x00022002 | update_link(dev) | __set_rx_mode(dev)); /* Clear and Enable interrupts by setting the interrupt mask. */ - writel(0x1A0F5, ioaddr + IntrStatus); - writel(0x1A0F5, ioaddr + IntrEnable); + writel(INTR_MASK_NORMAL, ioaddr + IntrStatus); + writel(INTR_MASK_NORMAL, ioaddr + IntrEnable); writel(0, ioaddr + RxStartDemand); } @@ -1064,12 +1068,12 @@ np->tx_ring[entry].buffer1 = np->tx_addr[entry]; if (skb->len < TX_BUFLIMIT) { - np->tx_ring[entry].length = DescWholePkt | skb->len; + np->tx_ring[entry].length = DescIntr | DescWholePkt | skb->len; } else { int len = skb->len - TX_BUFLIMIT; np->tx_ring[entry].buffer2 = np->tx_addr[entry]+TX_BUFLIMIT; - np->tx_ring[entry].length = DescWholePkt | (len << 11) | TX_BUFLIMIT; + np->tx_ring[entry].length = DescIntr | DescWholePkt | (len << 11) | TX_BUFLIMIT; } if(entry == TX_RING_SIZE-1) np->tx_ring[entry].length |= DescEndRing; @@ -1097,7 +1101,6 @@ if (np->cur_tx - np->dirty_tx > TX_QUEUE_LEN || ((np->drv_flags & HasBrokenTx) && np->tx_q_bytes > TX_BUG_FIFO_LIMIT)) { netif_stop_queue(dev); - wmb(); np->tx_full = 1; } spin_unlock_irq(&np->lock); @@ -1162,11 +1165,36 @@ np->tx_q_bytes < TX_BUG_FIFO_LIMIT) { /* The ring is no longer full, clear tbusy. */ np->tx_full = 0; - wmb(); netif_wake_queue(dev); } } +void check_mitigation(struct net_device *dev) +{ + struct netdev_private *np = dev->priv; + int packets = np->jiffy_packets; + long ioaddr = dev->base_addr; + + if (!netif_device_present(dev)) + goto out; /* needed against suspend races */ + np->jiffy_packets = 0; + if(packets > (packet_limit*4)/(3*HZ) && !np->mitigate) { + /* switch on mitigation */ + writel(INTR_MASK_MITIGATED, ioaddr + IntrEnable); + np->mitigate = 1; + if (debug > 2) + printk(KERN_DEBUG "%s: enabling mitigation.\n", dev->name); + } else if (packets < packet_limit/HZ && np->mitigate) { + /* switch off mitigation */ + writel(INTR_MASK_NORMAL, ioaddr + IntrEnable); + np->mitigate = 0; + if (debug > 2) + printk(KERN_DEBUG "%s: disabling mitigation.\n", dev->name); + } +out: + np->jiffies = jiffies; +} + /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ static void intr_handler(int irq, void *dev_instance, struct pt_regs *rgs) @@ -1174,55 +1202,47 @@ struct net_device *dev = (struct net_device *)dev_instance; struct netdev_private *np = dev->priv; long ioaddr = dev->base_addr; - int work_limit = max_interrupt_work; + u32 intr_status; if (!netif_device_present(dev)) return; - do { - u32 intr_status = readl(ioaddr + IntrStatus); - - /* Acknowledge all of the current interrupt sources ASAP. */ - writel(intr_status & 0x001ffff, ioaddr + IntrStatus); - - if (debug > 4) - printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", + if (np->jiffies != jiffies) + check_mitigation(dev); + intr_status = readl(ioaddr + IntrStatus); + if (debug > 4) + printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", dev->name, intr_status); - if ((intr_status & (NormalIntr|AbnormalIntr)) == 0) - break; + if (!(intr_status & (NormalIntr|AbnormalIntr|IntrRxDone))) + goto out; - if (intr_status & (IntrRxDone | RxNoBuf)) - netdev_rx(dev); - if (intr_status & RxNoBuf) - writel(0, ioaddr + RxStartDemand); - - if (intr_status & (TxIdle | IntrTxDone) && - np->cur_tx != np->dirty_tx) { - spin_lock(&np->lock); - netdev_tx_done(dev); - spin_unlock(&np->lock); - } + /* Acknowledge all of the current interrupt sources ASAP. */ + writel(intr_status & 0x001ffff, ioaddr + IntrStatus); - /* Abnormal error summary/uncommon events handlers. */ - if (intr_status & (AbnormalIntr | TxFIFOUnderflow | IntrPCIErr | - TimerInt | IntrTxStopped)) - netdev_error(dev, intr_status); - - if (--work_limit < 0) { - printk(KERN_WARNING "%s: Too much work at interrupt, " - "status=0x%4.4x.\n", dev->name, intr_status); - /* Set the timer to re-enable the other interrupts after - 10*82usec ticks. */ - spin_lock(&np->lock); - if (netif_device_present(dev)) { - writel(AbnormalIntr | TimerInt, ioaddr + IntrEnable); - writel(10, ioaddr + GPTimer); - } - spin_unlock(&np->lock); - break; - } - } while (1); + /* and reload to make certain it has left the writebuffer */ + readl(ioaddr + IntrStatus); + if (intr_status & (IntrRxDone | RxNoBuf)) + netdev_rx(dev); + if (intr_status & RxNoBuf) + writel(0, ioaddr + RxStartDemand); + + if (intr_status & (TxIdle | IntrTxDone) && + np->cur_tx != np->dirty_tx) { + spin_lock(&np->lock); + netdev_tx_done(dev); + spin_unlock(&np->lock); + } + /* Abnormal error summary/uncommon events handlers. */ + if (intr_status & (TxFIFOUnderflow | IntrPCIErr | IntrRxDied | + IntrTxStopped)) + netdev_error(dev, intr_status); +out: + if (np->mitigate) { + /* set 0x10000 (periodic) to guarantee that the timer arrives + * it seems that otherwise the timer is sometimes lost */ + writel(0x10000|7, ioaddr + GPTimer); + } if (debug > 3) printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", dev->name, (int)readl(ioaddr + IntrStatus)); @@ -1255,7 +1275,8 @@ if ((status & 0x38000300) != 0x0300) { /* Ingore earlier buffers. */ if ((status & 0xffff) != 0x7fff) { - printk(KERN_WARNING "%s: Oversized Ethernet frame spanned " + if (debug > 1) + printk(KERN_INFO "%s: Oversized Ethernet frame spanned " "multiple buffers, entry %#x status %4.4x!\n", dev->name, np->cur_rx, status); np->stats.rx_length_errors++; @@ -1322,6 +1343,7 @@ dev->last_rx = jiffies; np->stats.rx_packets++; np->stats.rx_bytes += pkt_len; + np->jiffy_packets++; } entry = (++np->cur_rx) % RX_RING_SIZE; np->rx_head_desc = &np->rx_ring[entry]; @@ -1382,11 +1404,6 @@ } if (intr_status & IntrRxDied) { /* Missed a Rx frame. */ np->stats.rx_errors++; - } - if (intr_status & TimerInt) { - /* Re-enable other interrupts. */ - if (netif_device_present(dev)) - writel(0x1A0F5, ioaddr + IntrEnable); } np->stats.rx_missed_errors += readl(ioaddr + RxMissed) & 0xffff; writel(0, ioaddr + RxStartDemand);