Data about interrupt avoidance

Manfred Spraul <manfred@colorfullife.com> · Sun, 14 Oct 2001 14:36:30 +0200

I've cleaned up the winbond-840 interrupt handler and added a very
simple interrupt avoidance scheme:

* if more than 1.333*packet_limit packets arrive per second, then switch
off the per-packet rx interrupt, and poll with a 500 us timer.

* if the number of packets falls below packet_limit, switch back to rx
interrupts.

packet limit is right now 2000.

Result:
single stream tcp receive performance up from 9.2 MB/sec to 10.5 MB/sec
(mtu 1500).

In both cases, 0 packet drops (as expected - one tcp streams with a
window of 64 kB and a 64 entry rx queue makes packet drops impossible)

IMHO that shows that packet drops are not the right sign when to switch
on interrupt mitigation - packets aren't dropped, instead the tcp sender
just sends packets slower and performance goes down.

--
	Manfred

--- 2.4/drivers/net/winbond-840.c	Tue Oct  2 19:58:10 2001
+++ build-2.4/drivers/net/winbond-840.c	Sat Oct  6 10:37:06 2001
@@ -63,7 +63,6 @@
    These may be modified when a driver module is loaded.*/
 
 static int debug = 1;			/* 1 normal messages, 0 quiet .. 7 verbose. */
-static int max_interrupt_work = 20;
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
    The '840 uses a 64 element hash table based on the Ethernet CRC.  */
 static int multicast_filter_limit = 32;
@@ -72,6 +71,7 @@
    Setting to > 1518 effectively disables this feature. */
 static int rx_copybreak;
 
+static int packet_limit = 2000;
 /* Used to pass the media type, etc.
    Both 'options[]' and 'full_duplex[]' should exist for driver
    interoperability.
@@ -91,7 +91,7 @@
 #define TX_RING_SIZE	16
 #define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
 #define TX_QUEUE_LEN_RESTART	5
-#define RX_RING_SIZE	32
+#define RX_RING_SIZE	64
 
 #define TX_BUFLIMIT	(1024-128)
 
@@ -151,15 +151,15 @@
 MODULE_DESCRIPTION("Winbond W89c840 Ethernet driver");
 MODULE_LICENSE("GPL");
 
-MODULE_PARM(max_interrupt_work, "i");
 MODULE_PARM(debug, "i");
 MODULE_PARM(rx_copybreak, "i");
 MODULE_PARM(multicast_filter_limit, "i");
+MODULE_PARM(packet_limit, "i");
 MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i");
 MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
-MODULE_PARM_DESC(max_interrupt_work, "winbond-840 maximum events handled per interrupt");
 MODULE_PARM_DESC(debug, "winbond-840 debug level (0-6)");
 MODULE_PARM_DESC(rx_copybreak, "winbond-840 copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(packet_limit, "winbond-840 static interrupt mitigation");
 MODULE_PARM_DESC(multicast_filter_limit, "winbond-840 maximum number of filtered multicast addresses");
 MODULE_PARM_DESC(options, "winbond-840: Bits 0-3: media type, bit 17: full duplex");
 MODULE_PARM_DESC(full_duplex, "winbond-840 full duplex setting(s) (1)");
@@ -304,6 +304,8 @@
 	TxFIFOUnderflow=0x20, RxErrIntr=0x10,
 	TxIdle=0x04, IntrTxStopped=0x02, IntrTxDone=0x01,
 };
+#define INTR_MASK_NORMAL	0x1A0F1
+#define INTR_MASK_MITIGATED	0x1A8B1
 
 /* Bits in the NetworkConfig register. */
 enum rx_mode_bits {
@@ -338,13 +340,12 @@
 	DescIntr=0x80000000,
 };
 
-#define PRIV_ALIGN	15 	/* Required alignment mask */
 #define MII_CNT		1 /* winbond only supports one MII */
 struct netdev_private {
 	struct w840_rx_desc *rx_ring;
 	dma_addr_t	rx_addr[RX_RING_SIZE];
 	struct w840_tx_desc *tx_ring;
-	dma_addr_t	tx_addr[RX_RING_SIZE];
+	dma_addr_t	tx_addr[TX_RING_SIZE];
 	dma_addr_t ring_dma_addr;
 	/* The addresses of receive-in-place skbuffs. */
 	struct sk_buff* rx_skbuff[RX_RING_SIZE];
@@ -371,6 +372,9 @@
 	u16 advertising;					/* NWay media advertisement */
 	unsigned char phys[MII_CNT];		/* MII device addresses, but only the first is used */
 	u32 mii;
+	int	jiffy_packets;
+	int	mitigate;
+	int	jiffies;
 };
 
 static int  eeprom_read(long ioaddr, int location);
@@ -970,8 +974,8 @@
 	update_csr6(dev, 0x00022002 | update_link(dev) | __set_rx_mode(dev));
 
 	/* Clear and Enable interrupts by setting the interrupt mask. */
-	writel(0x1A0F5, ioaddr + IntrStatus);
-	writel(0x1A0F5, ioaddr + IntrEnable);
+	writel(INTR_MASK_NORMAL, ioaddr + IntrStatus);
+	writel(INTR_MASK_NORMAL, ioaddr + IntrEnable);
 
 	writel(0, ioaddr + RxStartDemand);
 }
@@ -1064,12 +1068,12 @@
 
 	np->tx_ring[entry].buffer1 = np->tx_addr[entry];
 	if (skb->len < TX_BUFLIMIT) {
-		np->tx_ring[entry].length = DescWholePkt | skb->len;
+		np->tx_ring[entry].length = DescIntr | DescWholePkt | skb->len;
 	} else {
 		int len = skb->len - TX_BUFLIMIT;
 
 		np->tx_ring[entry].buffer2 = np->tx_addr[entry]+TX_BUFLIMIT;
-		np->tx_ring[entry].length = DescWholePkt | (len << 11) | TX_BUFLIMIT;
+		np->tx_ring[entry].length = DescIntr | DescWholePkt | (len << 11) | TX_BUFLIMIT;
 	}
 	if(entry == TX_RING_SIZE-1)
 		np->tx_ring[entry].length |= DescEndRing;
@@ -1097,7 +1101,6 @@
 	if (np->cur_tx - np->dirty_tx > TX_QUEUE_LEN ||
 		((np->drv_flags & HasBrokenTx) && np->tx_q_bytes > TX_BUG_FIFO_LIMIT)) {
 		netif_stop_queue(dev);
-		wmb();
 		np->tx_full = 1;
 	}
 	spin_unlock_irq(&np->lock);
@@ -1162,11 +1165,36 @@
 		np->tx_q_bytes < TX_BUG_FIFO_LIMIT) {
 		/* The ring is no longer full, clear tbusy. */
 		np->tx_full = 0;
-		wmb();
 		netif_wake_queue(dev);
 	}
 }
 
+void check_mitigation(struct net_device *dev)
+{
+	struct netdev_private *np = dev->priv;
+	int packets = np->jiffy_packets;
+	long ioaddr = dev->base_addr;
+
+	if (!netif_device_present(dev))
+		goto out;	/* needed against suspend races */
+	np->jiffy_packets = 0;
+	if(packets > (packet_limit*4)/(3*HZ) && !np->mitigate) {
+		/* switch on mitigation */
+		writel(INTR_MASK_MITIGATED, ioaddr + IntrEnable);
+		np->mitigate = 1;
+		if (debug > 2)
+			printk(KERN_DEBUG "%s: enabling mitigation.\n", dev->name);
+	} else if (packets < packet_limit/HZ && np->mitigate) {
+		/* switch off mitigation */
+		writel(INTR_MASK_NORMAL, ioaddr + IntrEnable);
+		np->mitigate = 0;
+		if (debug > 2)
+			printk(KERN_DEBUG "%s: disabling mitigation.\n", dev->name);
+	}
+out:
+	np->jiffies = jiffies;
+}
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static void intr_handler(int irq, void *dev_instance, struct pt_regs *rgs)
@@ -1174,55 +1202,47 @@
 	struct net_device *dev = (struct net_device *)dev_instance;
 	struct netdev_private *np = dev->priv;
 	long ioaddr = dev->base_addr;
-	int work_limit = max_interrupt_work;
+	u32 intr_status;
 
 	if (!netif_device_present(dev))
 		return;
-	do {
-		u32 intr_status = readl(ioaddr + IntrStatus);
-
-		/* Acknowledge all of the current interrupt sources ASAP. */
-		writel(intr_status & 0x001ffff, ioaddr + IntrStatus);
-
-		if (debug > 4)
-			printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n",
+	if (np->jiffies != jiffies)
+		check_mitigation(dev);
+	intr_status = readl(ioaddr + IntrStatus);
+	if (debug > 4)
+		printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n",
 				   dev->name, intr_status);
 
-		if ((intr_status & (NormalIntr|AbnormalIntr)) == 0)
-			break;
+	if (!(intr_status & (NormalIntr|AbnormalIntr|IntrRxDone)))
+		goto out;
 
-		if (intr_status & (IntrRxDone | RxNoBuf))
-			netdev_rx(dev);
-		if (intr_status & RxNoBuf)
-			writel(0, ioaddr + RxStartDemand);
-
-		if (intr_status & (TxIdle | IntrTxDone) &&
-			np->cur_tx != np->dirty_tx) {
-			spin_lock(&np->lock);
-			netdev_tx_done(dev);
-			spin_unlock(&np->lock);
-		}
+	/* Acknowledge all of the current interrupt sources ASAP. */
+	writel(intr_status & 0x001ffff, ioaddr + IntrStatus);
 
-		/* Abnormal error summary/uncommon events handlers. */
-		if (intr_status & (AbnormalIntr | TxFIFOUnderflow | IntrPCIErr |
-						   TimerInt | IntrTxStopped))
-			netdev_error(dev, intr_status);
-
-		if (--work_limit < 0) {
-			printk(KERN_WARNING "%s: Too much work at interrupt, "
-				   "status=0x%4.4x.\n", dev->name, intr_status);
-			/* Set the timer to re-enable the other interrupts after
-			   10*82usec ticks. */
-			spin_lock(&np->lock);
-			if (netif_device_present(dev)) {
-				writel(AbnormalIntr | TimerInt, ioaddr + IntrEnable);
-				writel(10, ioaddr + GPTimer);
-			}
-			spin_unlock(&np->lock);
-			break;
-		}
-	} while (1);
+	/* and reload to make certain it has left the writebuffer */
+	readl(ioaddr + IntrStatus);
 
+	if (intr_status & (IntrRxDone | RxNoBuf))
+		netdev_rx(dev);
+	if (intr_status & RxNoBuf)
+		writel(0, ioaddr + RxStartDemand);
+
+	if (intr_status & (TxIdle | IntrTxDone) &&
+		np->cur_tx != np->dirty_tx) {
+		spin_lock(&np->lock);
+		netdev_tx_done(dev);
+		spin_unlock(&np->lock);
+	}
+	/* Abnormal error summary/uncommon events handlers. */
+	if (intr_status & (TxFIFOUnderflow | IntrPCIErr | IntrRxDied |
+					   IntrTxStopped))
+		netdev_error(dev, intr_status);
+out:
+	if (np->mitigate) {
+		/* set 0x10000 (periodic) to guarantee that the timer arrives
+		 * it seems that otherwise the timer is sometimes lost */
+		writel(0x10000|7, ioaddr + GPTimer);
+	}
 	if (debug > 3)
 		printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
 			   dev->name, (int)readl(ioaddr + IntrStatus));
@@ -1255,7 +1275,8 @@
 			if ((status & 0x38000300) != 0x0300) {
 				/* Ingore earlier buffers. */
 				if ((status & 0xffff) != 0x7fff) {
-					printk(KERN_WARNING "%s: Oversized Ethernet frame spanned "
+					if (debug > 1)
+						printk(KERN_INFO "%s: Oversized Ethernet frame spanned "
 						   "multiple buffers, entry %#x status %4.4x!\n",
 						   dev->name, np->cur_rx, status);
 					np->stats.rx_length_errors++;
@@ -1322,6 +1343,7 @@
 			dev->last_rx = jiffies;
 			np->stats.rx_packets++;
 			np->stats.rx_bytes += pkt_len;
+			np->jiffy_packets++;
 		}
 		entry = (++np->cur_rx) % RX_RING_SIZE;
 		np->rx_head_desc = &np->rx_ring[entry];
@@ -1382,11 +1404,6 @@
 	}
 	if (intr_status & IntrRxDied) {		/* Missed a Rx frame. */
 		np->stats.rx_errors++;
-	}
-	if (intr_status & TimerInt) {
-		/* Re-enable other interrupts. */
-		if (netif_device_present(dev))
-			writel(0x1A0F5, ioaddr + IntrEnable);
 	}
 	np->stats.rx_missed_errors += readl(ioaddr + RxMissed) & 0xffff;
 	writel(0, ioaddr + RxStartDemand);