Howdie, I was hired to port a heavy traffic Linux network device which uses 100Mbps tulip devices from kernel 2.2 to kernel 2.4. The original code used a modified tulip driver (attached patch), based on v0.91g-ppc 7/16/99 that comes with linux "vanilla" 2.2.16. This driver uses a timer driven polling mechanism (no interrupts at all) for receiving and transmitting packets, instead of the interrupt driven mechanism in the original driver. The target is to get with kernel 2.4, at least the same performance as achieved with the 2.2 kernel. As results may show, it is not trivial. The relevant setup includes only one device, which receives and sends traffic. Results (in persentage of "line rate"): Kernel 2.2.16 with tulip_polling_2.2.16.patch: 54% Kernel 2.4.22 "vanilla": 10% Kernel 2.4.22 with NAPI-1.2.0 [2002-07-06 J Hadi salim]: 26% Kernel 2.4.22 with tulip v0.97 7/22/2003 Donald Becker: 34% Kernel 2.4.22 with tulip v0.97a (polling patch ported from 2.2.16): 15% Kernel 2.6.0-test11 "vanilla": 31% The original 2.4 tulip driver, as well as the NAPI driver haven't produced the required performance. A check of the 2.6 kernel with original NAPI tulip driver seems to suggest that this is not a limitation with the NAPIified tulip ported to 2.4 as it behaves the same on 2.6 The v0.97 Becker version, which supports interrupt mitigation on it's own, seems to give the best results with kernel 2.4, but still less than the 2.2 patched driver attached here. Note that all these are true only when the same NIC does both Rx and Tx. When two different NICs do Rx and Tx, both the NAPI drivers and Beckers v0.97 achieve almost the same results as the polling driver. However, porting the polling mechanism from the patched v0.91g-ppc to v0.97 (patch attached ) did not seem to work very well in regard to performance as the result shows, which most likley means I've missed something and messed up the port :-) Any insights as to how to get any version of the driver (Becker, NAPI or fixes to my port) produce the same results on 2.4 as available on 2.2 will be very much appreciated. Extra points if you explain what's the dumb thing I did when trying to port the polling version to 2.4... ;-) Hardware setup: Pentium III, DECchip 21142/43 (rev 65). Testing setup: - Shomiti (network traffic generator), is connected to eth2 on a Linux station. - All sent packets to the Linux station and the received packets from it are recorded. -Small valid UDP packets (64 bytes long) with src 192.168.2.100 and dst 192.168.3.100 are sent to the Linux. - A proper forwarded packets is received from the Linux. - Linux configuration: ifconfig eth2 192.168.2.2 up ifconfig eth2:1 192.168.3.3 up arp -s 192.168.2.100 22:22:22:22:22:22 arp -s 192.168.3.100 33:33:33:33:33:33 Compiler and linker: gcc-2.95.3 and binutils-2.9.1.0.25 as recommended for kernel 2.4. Line rate terms explanation: 54% line rate achieved with the 2.2 kernel are 81,000 packets/Sec, of 64 bytes length packets, each direction, with no loss. Description of the attached patches: tulip_polling_2.2.16.patch - well tested polling capability for the tulip driver of "vanilla" kernel 2.2.16 [v0.91g-ppc]. tulip_polling_0.97.patch - experimental adaption of the 2.2.16 polling patch to v0.97 of Donald Becker tulip driver. Many thanks! Gilad. -- Gilad Ben-Yossef <gilad@codefidence.com> Codefidence. A name you can trust (tm) http://www.codefidence.com
--- tulip-v0.97.c 2003-12-11 16:08:03.000000000 -0500 +++ tulip-v0.97a-polling.c 2003-12-11 16:08:03.000000000 -0500 @@ -24,10 +24,14 @@ /* These identify the driver base version and may not be removed. */ static const char version1[] = -"tulip.c:v0.97 7/22/2003 Written by Donald Becker <becker@scyld.com>\n"; +"tulip.c:v0.97a 7/22/2003 Written by Donald Becker <becker@scyld.com>" +" polling version 12/7/2003 by Shahar Livne <shahar@codefidence.com>\n"; static const char version2[] = " http://www.scyld.com/network/tulip.html\n"; +#define POLLIT +#define SKB_RECYCLED 256 + #define SMP_CHECK /* The user-configurable values. @@ -35,8 +39,14 @@ static int debug = 2; /* Message enable: 0..31 = no..all messages. */ +#ifndef POLLIT /* Maximum events (Rx packets, etc.) to handle at each interrupt. */ static int max_interrupt_work = 25; +#endif //POLLIT + +#ifndef RUN_AT +#define RUN_AT(x) (jiffies + (x)) +#endif #define MAX_UNITS 8 /* Used to pass the full-duplex flag, etc. */ @@ -66,7 +76,11 @@ #ifdef __alpha__ /* Always copy to aligned IP headers. */ static int rx_copybreak = 1518; #else +#ifdef POLLIT +static int rx_copybreak = 0; +#else static int rx_copybreak = 100; +#endif //POLLIT #endif /* @@ -110,9 +124,16 @@ These values have been carefully studied: changing these might mask a problem, it won't fix it. */ +//FIXME:check best TX_QUEUE_LEN for POLLIT +#ifdef POLLIT +#define TX_RING_SIZE 256 +#define TX_QUEUE_LEN 10 +#define RX_RING_SIZE 256 +#else #define TX_RING_SIZE 16 #define TX_QUEUE_LEN 10 #define RX_RING_SIZE 32 +#endif //POLLIT /* Operational parameters that usually are not changed. */ /* Time in jiffies before concluding the transmitter is hung. */ @@ -166,6 +187,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> +#include <linux/ip.h> /* FIXME: is it necessary? */ #include <asm/processor.h> /* Processor type for cache alignment. */ #include <asm/bitops.h> #include <asm/io.h> @@ -609,6 +631,7 @@ }; #define PRIV_ALIGN 15 /* Required alignment mask */ + struct tulip_private { struct tulip_rx_desc rx_ring[RX_RING_SIZE]; struct tulip_tx_desc tx_ring[TX_RING_SIZE]; @@ -624,6 +647,9 @@ struct pci_dev *pci_dev; int chip_id, revision; int flags; +#ifdef POLLIT + int poll_it; +#endif //POLLIT int max_interrupt_work; int msg_level; unsigned int csr0, csr6; /* Current CSR0, CSR6 settings. */ @@ -676,7 +702,9 @@ static void tulip_tx_timeout(struct net_device *dev); static void tulip_init_ring(struct net_device *dev); +#ifndef POLLIT static int tulip_start_xmit(struct sk_buff *skb, struct net_device *dev); +#endif //POLLIT static int tulip_rx(struct net_device *dev); static void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs); static int tulip_close(struct net_device *dev); @@ -686,11 +714,104 @@ #endif static void set_rx_mode(struct net_device *dev); +#ifdef POLLIT +static int tulip_tx_clean(struct net_device *dev); +static int tulip_tx_queue(struct sk_buff *skb, struct net_device *dev); +#define ONE ((void *) 1) +#endif //POLLIT + /* A list of all installed Tulip devices. */ static struct net_device *root_tulip_dev = NULL; +#ifdef POLLIT +static struct timer_list poll_timer; /* Poll receive interrupts */ +static int poll_count; /* should really use atomic increment and decrement */ +static void add_poll_timer(struct net_device *dev); + +/* Poll for received packets */ +static void tulip_poll_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)root_tulip_dev; + + while (dev) { + struct tulip_private *sp = (struct tulip_private *)(dev->priv); + + if (sp->poll_it) + tulip_interrupt(dev->irq, (void *)dev, (struct pt_regs *)ONE); + dev= sp->next_module; + } + poll_timer.expires = RUN_AT(1); /* The next tick */ + add_timer(&poll_timer); +} + +/* Schedule polling for receiver inputs */ +static void add_poll_timer(struct net_device *dev) +{ + static int once_only = 0; + + if (once_only++ == 0) { + init_timer(&poll_timer); + poll_timer.data = 0; + poll_timer.function = &tulip_poll_timer; /* timer handler */ + } + poll_timer.expires = RUN_AT(1); /* The next tick */ + add_timer(&poll_timer); +} + +#ifdef SKB_RECYCLED + +unsigned tulip_recycled_skb_size = 0; +static int tulip_recycled_skb_cnt = 0; +static unsigned long tulip_recycle_skb_lock = 0; +static struct sk_buff *tulip_recycled_skbs[SKB_RECYCLED]; + +/* WARNING: these functions are not reentrant! */ +static inline struct sk_buff *tulip_get_recycled_skb(void) +{ + struct sk_buff* skb = 0; + if (tulip_recycled_skb_cnt > 0) { + while (test_and_set_bit(0, (void*)&tulip_recycle_skb_lock)) { + while(tulip_recycle_skb_lock) + asm volatile("" ::: "memory"); + } + if (tulip_recycled_skb_cnt > 0) { + tulip_recycled_skb_cnt--; + skb = tulip_recycled_skbs[tulip_recycled_skb_cnt]; + } + clear_bit(0, (void*)&tulip_recycle_skb_lock); + } + return skb; +} + +/* tries to recycle an skb. if not successful, the skb passed in is freed */ +static inline void tulip_recycle_or_free_skb(struct sk_buff *skb) +{ + if (skb->truesize == tulip_recycled_skb_size) { + if (tulip_recycled_skb_cnt < SKB_RECYCLED) { + while (test_and_set_bit(0, (void*)&tulip_recycle_skb_lock)) { + while(tulip_recycle_skb_lock) + asm volatile("" ::: "memory"); + } + if (tulip_recycled_skb_cnt < SKB_RECYCLED) { + if (skb_recycle(skb)) { + tulip_recycled_skbs[tulip_recycled_skb_cnt] = skb; + tulip_recycled_skb_cnt++; + } + skb = 0; + } + clear_bit(0, (void*)&tulip_recycle_skb_lock); + } + } + if (skb != 0) + dev_kfree_skb(skb); +} +#endif //SKB_RECYCLED +#endif //POLLIT + +//FIXME3 + static void *tulip_probe1(struct pci_dev *pdev, void *init_dev, long ioaddr, int irq, int pci_tbl_idx, int find_cnt) { @@ -735,6 +856,11 @@ pci_read_config_byte(pdev, PCI_REVISION_ID, &chip_rev); +#ifdef POLLIT + /* Turn off receive and transmit interrupts */ + tulip_tbl[chip_idx].valid_intrs &= ~(TxIntr | TxNoBuf | RxIntr | RxNoBuf); +#endif //POLLIT + printk(KERN_INFO "%s: %s rev %d at %#3lx,", dev->name, pci_id_tbl[pci_tbl_idx].name, chip_rev, ioaddr); @@ -851,7 +977,9 @@ tp->flags = tulip_tbl[chip_idx].flags | (pci_id_tbl[pci_tbl_idx].drv_flags & 0xffffff00); tp->rx_copybreak = rx_copybreak; +#ifndef POLLIT tp->max_interrupt_work = max_interrupt_work; +#endif //POLLIT tp->multicast_filter_limit = multicast_filter_limit; tp->csr0 = csr0; @@ -909,7 +1037,11 @@ /* The Tulip-specific entries in the device structure. */ dev->open = &tulip_open; +#ifdef POLLIT + dev->hard_start_xmit = &tulip_tx_queue; +#else dev->hard_start_xmit = &tulip_start_xmit; +#endif //POLLIT dev->stop = &tulip_close; dev->get_stats = &tulip_get_stats; #ifdef HAVE_PRIVATE_IOCTL @@ -989,6 +1121,8 @@ (tp->full_duplex ? 0x0100 : 0x0000) | ((media_cap[tp->default_port] & MediaIs100) ? 0x2000 : 0x1000)); + + } } tp->mii_cnt = phy_idx; @@ -1579,6 +1713,17 @@ outl(tp->csr6 | TxOn | RxOn, ioaddr + CSR6); outl(0, ioaddr + CSR2); /* Rx poll demand */ +#ifdef POLLIT + tp->poll_it = 1; + + if (poll_count++ == 0) /* start up one and only one timer */ + add_poll_timer(dev); + + /* something is fishy if poll_count get too high */ + if (poll_count > 2) + printk("poll_count > 2(%d)\n", poll_count); +#endif //POLLIT + if (tp->msg_level & NETIF_MSG_IFUP) printk(KERN_DEBUG "%s: Done tulip_open(), CSR0 %8.8x, CSR5 %8.8x CSR6 " "%8.8x.\n", dev->name, (int)inl(ioaddr + CSR0), @@ -2688,6 +2833,13 @@ dev_alloc_skb() provides 16 byte alignment. But do *not* use skb_reserve() to align the IP header! */ struct sk_buff *skb = dev_alloc_skb(tp->rx_buf_sz); +#ifdef POLLIT +#ifdef SKB_RECYCLED + /* Save the size we need for these rings */ + if (tulip_recycled_skb_size == 0) + tulip_recycled_skb_size = skb->truesize; +#endif //SKB_RECYCLED +#endif //POLLIT tp->rx_skbuff[i] = skb; if (skb == NULL) break; @@ -2707,6 +2859,7 @@ tp->tx_ring[i-1].buffer2 = virt_to_le32desc(&tp->tx_ring[0]); } +#ifndef POLLIT static int tulip_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -2766,6 +2919,7 @@ return 0; } +#endif //POLLIT /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ @@ -2778,18 +2932,37 @@ do { csr5 = inl(ioaddr + CSR5); +#ifdef POLLIT + /* if we are polling, set the interrupt bits just once, regs != ONE for real interrupt */ + if (regs == ONE) { + /* fake the interrupt flags */ + csr5 |= NormalIntr | RxIntr | TxIntr; + } else { + if ((csr5 & (NormalIntr|AbnormalIntr)) == 0) + break; + outl(csr5 & 0x0001ffff, ioaddr + CSR5); + } +#endif //POLLIT + /* leave if we're not really in interrupt (or poll) */ if ((csr5 & (NormalIntr|AbnormalIntr)) == 0) break; if (tp->msg_level & NETIF_MSG_INTR) printk(KERN_DEBUG "%s: interrupt csr5=%#8.8x new csr5=%#8.8x.\n", dev->name, csr5, (int)inl(dev->base_addr + CSR5)); +#ifndef POLLIT /* Acknowledge all of the current interrupt sources ASAP. */ outl(csr5 & 0x0001ffff, ioaddr + CSR5); +#endif //POLLIT (not) if (csr5 & (RxIntr | RxNoBuf)) work_budget -= tulip_rx(dev); - +#ifdef POLLIT + if (regs == ONE) { + tulip_tx_clean(dev); + break; + } +#else if (csr5 & (TxNoBuf | TxDied | TxIntr)) { unsigned int dirty_tx; @@ -2864,6 +3037,7 @@ tp->rx_dead = 0; } } +#endif //POLLIT /* Log errors. */ if (csr5 & AbnormalIntr) { /* Abnormal error summary bit. */ @@ -2947,6 +3121,13 @@ return; } + +#ifdef POLLIT +/* Don't process in one tick more than these many packets */ +#define MAX_RX_PER_TICK 250000/HZ +#endif //POLLIT + + static int tulip_rx(struct net_device *dev) { struct tulip_private *tp = (struct tulip_private *)dev->priv; @@ -2954,6 +3135,11 @@ int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; int work_done = 0; +#ifdef POLLIT + if (rx_work_limit > MAX_RX_PER_TICK) + rx_work_limit= MAX_RX_PER_TICK; +#endif //POLLIT + if (tp->msg_level & NETIF_MSG_RX_STATUS) printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry, tp->rx_ring[entry].status); @@ -3033,6 +3219,20 @@ for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) { entry = tp->dirty_rx % RX_RING_SIZE; if (tp->rx_skbuff[entry] == NULL) { +#ifdef POLLIT +#ifdef SKB_RECYCLED + struct sk_buff *skb = tulip_get_recycled_skb(); +#else //SKB_RECYUCLED + struct sk_buff *skb = NULL; +#endif //SKB_RECYCLED + if (skb) + tp->rx_skbuff[entry] = skb; + else { + skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ); + if (skb == NULL) + break; + } +#else //POLLIT struct sk_buff *skb; skb = tp->rx_skbuff[entry] = dev_alloc_skb(tp->rx_buf_sz); if (skb == NULL) { @@ -3041,6 +3241,7 @@ "receive buffers.\n", dev->name); break; } +#endif //POLLIT skb->dev = dev; /* Mark as being used by this device. */ tp->rx_ring[entry].buffer1 = virt_to_le32desc(skb->tail); work_done++; @@ -3051,6 +3252,7 @@ return work_done; } + static void empty_rings(struct net_device *dev) { struct tulip_private *tp = (struct tulip_private *)dev->priv; @@ -3096,6 +3298,13 @@ if (tp->chip_id == DC21040) outl(0x00000004, ioaddr + CSR13); +#ifdef POLLIT + tp->poll_it = 0; + + if (--poll_count == 0) + del_timer(&poll_timer); +#endif //POLLIT + if (inl(ioaddr + CSR6) != 0xffffffff) tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff; @@ -3672,6 +3881,115 @@ reverse_probe = 0; /* Not used. */ } #endif /* MODULE */ + + +#ifdef POLLIT +static int tulip_tx_queue(struct sk_buff *skb, struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry; + u32 flag; + + if (netif_pause_tx_queue(dev) != 0) { + tp->stats.tx_dropped++; + return 1; + } + + /* Caution: the write order is important here, set the base address + * with the "ownership" bits last. */ + + /* Calculate the next Tx descriptor entry. */ + entry = tp->cur_tx % TX_RING_SIZE; + + tp->tx_skbuff[entry] = skb; + tp->tx_ring[entry].buffer1 = virt_to_le32desc(skb->data); + + flag = 0x60000000; /* No interrupt */ + + if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) + tp->tx_full = 0; + else + /* Leave room for set_rx_mode() to fill entries. */ + tp->tx_full = 1; + + if (entry == TX_RING_SIZE-1) + flag = 0xe0000000 | DESC_RING_WRAP; + + tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag); + + /* Pass ownership to the chip. */ + tp->tx_ring[entry].status = cpu_to_le32(DescOwned); + tp->cur_tx++; + +#ifndef DEMAND_POLLTX + outl(0, dev->base_addr + CSR1); + dev->trans_start = jiffies; +#endif + if (!tp->tx_full) + //clear_bit(0, (void*)&dev->tbusy); + netif_unpause_tx_queue(dev); + return 0; +} + +/* clean up tx dma ring */ +static int tulip_tx_clean(struct net_device *dev) +{ + struct tulip_private *tp; + unsigned int dirty_tx; + int ret; + tp = (struct tulip_private *)dev->priv; + + for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0; dirty_tx++) { + int entry = dirty_tx % TX_RING_SIZE; + int status = le32_to_cpu(tp->tx_ring[entry].status); +#ifdef PREFETCH + int next_entry = (dirty_tx+1) % TX_RING_SIZE; + volatile int next_status; + next_status = tp->tx_ring[next_entry].status; +#endif + if (status < 0) break; /* It still hasn't been Txed */ + + /* Check for Rx filter setup frames. */ + if (tp->tx_skbuff[entry] == NULL) + continue; + + if (status & 0x8000) { + /* There was an major error, log it. */ + tp->stats.tx_errors++; + if (status & 0x4104) tp->stats.tx_aborted_errors++; + if (status & 0x0C00) tp->stats.tx_carrier_errors++; + if (status & 0x0200) tp->stats.tx_window_errors++; + if (status & 0x0002) tp->stats.tx_fifo_errors++; + if ((status & 0x0080) && tp->full_duplex == 0) + tp->stats.tx_heartbeat_errors++; + } else { + tp->stats.tx_bytes += tp->tx_ring[entry].length & 0x7ff; + tp->stats.collisions += (status >> 3) & 15; + tp->stats.tx_packets++; + } +#ifdef SKB_RECYCLE + tulip_recycle_or_free_skb(tp->tx_skbuff[entry]); +#else + if (tp->tx_skbuff[entry] != 0) + dev_kfree_skb(tp->tx_skbuff[entry]); + else printk(KERN_INFO "Try to free NULL skbuff.\n"); +#endif + tp->tx_skbuff[entry] = 0; + } + + if (tp->tx_full && tp->cur_tx-dirty_tx < TX_RING_SIZE-2) { + /* The ring is no longer full, resume queue. */ + tp->tx_full = 0; + netif_resume_tx_queue(dev); + } + + tp->dirty_tx = dirty_tx; + ret = tp->cur_tx - tp->dirty_tx; + + return ret; +} +#endif //POLLIT + /* * Local variables:
--- /usr/src/linux-2.2.16/drivers/net/tulip.c 2000-05-03 20:16:43.000000000 -0400 +++ tulip-2.2.16-polling.c 2003-12-11 09:46:47.000000000 -0500 @@ -21,13 +21,17 @@ bug reports. */ +#define POLLIT + #define SMP_CHECK static const char version[] = "tulip.c:v0.91g-ppc 7/16/99 becker@cesdis.gsfc.nasa.gov\n"; /* A few user-configurable values. */ +#ifndef POLLIT /* Maximum events (Rx packets, etc.) to handle at each interrupt. */ static int max_interrupt_work = 25; +#endif POLLIT #define MAX_UNITS 8 /* Used to pass the full-duplex flag, etc. */ @@ -54,14 +58,25 @@ Making the Tx ring too large decreases the effectiveness of channel bonding and packet priority. There are no ill effects from too-large receive rings. */ +#ifdef POLLIT +#define TX_RING_SIZE 256 +#define RX_RING_SIZE 256 +#else #define TX_RING_SIZE 16 #define RX_RING_SIZE 32 +#endif POLLIT /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */ #ifdef __alpha__ static int rx_copybreak = 1518; #else + +#ifdef POLLIT +static int rx_copybreak = 0; +#else static int rx_copybreak = 100; +#endif POLLIT + #endif /* @@ -508,6 +523,9 @@ int chip_id; int revision; int flags; +#ifdef POLLIT + int poll_it; +#endif POLLIT struct net_device_stats stats; struct timer_list timer; /* Media selection timer. */ int interrupt; /* In-interrupt flag. */ @@ -554,7 +572,9 @@ static void tulip_tx_timeout(struct device *dev); static void tulip_init_ring(struct device *dev); +#ifndef POLLIT static int tulip_start_xmit(struct sk_buff *skb, struct device *dev); +#endif POLLIT static int tulip_refill_rx(struct device *dev); static int tulip_rx(struct device *dev); static void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs); @@ -565,11 +585,54 @@ #endif static void set_rx_mode(struct device *dev); +#ifdef POLLIT +static int tulip_tx_clean(struct device *dev); +static int tulip_tx_queue(struct sk_buff *skb, struct device *dev); +#define ONE ((void *) 1) +#endif POLLIT + /* A list of all installed Tulip devices. */ static struct device *root_tulip_dev = NULL; +#ifdef POLLIT +static struct timer_list poll_timer; /* Poll receive interrupts */ +static int poll_count; /* should really use atomic increment and decrement */ +static void add_poll_timer(struct device *dev); + +/* Poll for received packets */ +static void tulip_poll_timer(unsigned long data) +{ + struct device *dev = (struct device *)root_tulip_dev; + + while (dev) { + struct tulip_private *sp = (struct tulip_private *)dev->priv; + + if (sp->poll_it) + tulip_interrupt(dev->irq, (void *)dev, (struct pt_regs *)ONE); + dev= sp->next_module; + } + poll_timer.expires = RUN_AT(1); /* The next tick */ + add_timer(&poll_timer); +} + +/* Schedule polling for receiver inputs */ +static void add_poll_timer(struct device *dev) +{ + static int once_only = 0; + + if (once_only++ == 0) { + init_timer(&poll_timer); + poll_timer.data = 0; + poll_timer.function = &tulip_poll_timer; /* timer handler */ + } + poll_timer.expires = RUN_AT(1); /* The next tick */ + add_timer(&poll_timer); +} +#endif POLLIT + + #ifndef CARDBUS int tulip_probe(struct device *dev) { @@ -709,6 +772,11 @@ if (tulip_tbl[chip_idx].flags & HAS_PWRDWN) pcibios_write_config_dword(pci_bus, pci_devfn, 0x40, 0x00000000); +#ifdef POLLIT + /* Turn off receive and transmit interrupts */ + tulip_tbl[chip_idx].valid_intrs &= ~(TxIntr | TxNoBuf | RxIntr | RxNoBuf); +#endif POLLIT + printk(KERN_INFO "%s: %s rev %d at %#3lx,", dev->name, tulip_tbl[chip_idx].chip_name, chip_rev, ioaddr); @@ -926,7 +994,11 @@ /* The Tulip-specific entries in the device structure. */ dev->open = &tulip_open; +#ifdef POLLIT + dev->hard_start_xmit = &tulip_tx_queue; +#else dev->hard_start_xmit = &tulip_start_xmit; +#endif POLLIT dev->stop = &tulip_close; dev->get_stats = &tulip_get_stats; #ifdef HAVE_PRIVATE_IOCTL @@ -1591,6 +1663,17 @@ outl(tp->csr6 | 0x2002, ioaddr + CSR6); outl(0, ioaddr + CSR2); /* Rx poll demand */ +#ifdef POLLIT + tp->poll_it = 1; + + if (poll_count++ == 0) /* start up one and only one timer */ + add_poll_timer(dev); + + /* something is fishy if poll_count get too high */ + if (poll_count > 2) + printk("poll_count > 2(%d)\n", poll_count); +#endif POLLIT + if (tulip_debug > 2) { printk(KERN_DEBUG "%s: Done tulip_open(), CSR0 %8.8x, CSR5 %8.8x CSR6 %8.8x.\n", dev->name, inl(ioaddr + CSR0), inl(ioaddr + CSR5), @@ -2537,6 +2620,7 @@ tp->tx_ring[i-1].buffer2 = virt_to_le32desc(&tp->tx_ring[0]); } +#ifndef POLLIT static int tulip_start_xmit(struct sk_buff *skb, struct device *dev) { @@ -2587,6 +2671,7 @@ return 0; } +#endif POLLIT /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ @@ -2607,10 +2692,18 @@ #if defined(__i386__) && defined(SMP_CHECK) if (test_and_set_bit(0, (void*)&dev->interrupt)) { +#ifdef POLLIT + /* We must acknowledge interrupts, otherwise death by infinite interrupt + loop, Must think of something to keep from losing important interrupt + information.*/ + csr5 = inl(ioaddr + CSR5); + outl(csr5 & 0x0001ffff, ioaddr + CSR5); +#else POLLIT printk(KERN_ERR "%s: Duplicate entry of the interrupt handler by " "processor %d.\n", dev->name, hard_smp_processor_id()); dev->interrupt = 0; +#endif POLLIT return; } #else @@ -2625,8 +2718,17 @@ do { csr5 = inl(ioaddr + CSR5); +#ifdef POLLIT + /* if we are polling, set the interrupt bits just once, regs != ONE for real interrupt */ + if (regs == ONE) { + csr5 |= NormalIntr | RxIntr | TxIntr; + } else { + outl(csr5 & 0x0001ffff, ioaddr + CSR5); + } +#else POLLIT /* Acknowledge all of the current interrupt sources ASAP. */ outl(csr5 & 0x0001ffff, ioaddr + CSR5); +#endif POLLIT if (tulip_debug > 4) printk(KERN_DEBUG "%s: interrupt csr5=%#8.8x new csr5=%#8.8x.\n", @@ -2640,6 +2742,12 @@ tulip_refill_rx(dev); } +#ifdef POLLIT + if (regs == ONE){ + tulip_tx_clean(dev); + break; + } +#else if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) { unsigned int dirty_tx; @@ -2714,6 +2822,7 @@ outl(tp->csr6 | 0x2002, ioaddr + CSR6); } } +#endif POLLIT /* Log errors. */ if (csr5 & AbnormalIntr) { /* Abnormal error summary bit. */ @@ -2772,7 +2881,9 @@ } } while (1); +#ifndef POLLIT tulip_refill_rx(dev); +#endif POLLIT /* check if we card is in suspend mode */ entry = tp->dirty_rx % RX_RING_SIZE; @@ -2806,6 +2917,34 @@ return; } +#ifdef POLLIT +static int tulip_refill_rx(struct device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry; + int refilled = 0; + + /* Refill the Rx ring buffers. */ + for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) { + entry = tp->dirty_rx % RX_RING_SIZE; + if (tp->rx_skbuff[entry] == NULL) { + struct sk_buff *skb = NULL; + if (skb) + tp->rx_skbuff[entry] = skb; + else { + skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ); + if (skb == NULL) + break; + } + skb->dev = dev; /* Mark as being used by this device. */ + tp->rx_ring[entry].buffer1 = virt_to_le32desc(skb->tail); + refilled++; + } + tp->rx_ring[entry].status = cpu_to_le32(DescOwned); + } + return refilled; +} +#else static int tulip_refill_rx(struct device *dev) { struct tulip_private *tp = (struct tulip_private *)dev->priv; @@ -2828,6 +2967,12 @@ } return refilled; } +#endif POLLIT + +#ifdef POLLIT +/* Don't process in one tick more than these many packets */ +#define MAX_RX_PER_TICK 250000/HZ +#endif POLLIT static int tulip_rx(struct device *dev) { @@ -2836,9 +2981,15 @@ int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; int received = 0; +#ifdef POLLIT + if (rx_work_limit > MAX_RX_PER_TICK) + rx_work_limit= MAX_RX_PER_TICK; +#endif POLLIT + if (tulip_debug > 4) printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry, tp->rx_ring[entry].status); + /* If we own the next entry, it is a new packet. Send it up. */ while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) { s32 status = le32_to_cpu(tp->rx_ring[entry].status); @@ -2943,6 +3094,13 @@ if (tp->chip_id == DC21040) outl(0x00000004, ioaddr + CSR13); +#ifdef POLLIT + tp->poll_it = 0; + + if (--poll_count == 0) + del_timer(&poll_timer); +#endif POLLIT + if (inl(ioaddr + CSR6) != 0xffffffff) tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff; @@ -3345,6 +3503,111 @@ } #endif /* MODULE */ + +#ifdef POLLIT +static int tulip_tx_queue(struct sk_buff *skb, struct device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry; + u32 flag; + + if (test_and_set_bit(0, (void*)&dev->tbusy) != 0) { +#ifdef UNDEF /* removed by tmeged */ + printk("tulip_tx_queue: reject because tbusy\n"); +#endif UNDEF + tp->stats.tx_dropped++; + return 1; + } + + /* Caution: the write order is important here, set the base address + * with the "ownership" bits last. */ + + /* Calculate the next Tx descriptor entry. */ + entry = tp->cur_tx % TX_RING_SIZE; + + tp->tx_skbuff[entry] = skb; + tp->tx_ring[entry].buffer1 = virt_to_le32desc(skb->data); + + flag = 0x60000000; /* No interrupt */ + + if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) + tp->tx_full = 0; + else + /* Leave room for set_rx_mode() to fill entries. */ + tp->tx_full = 1; + + if (entry == TX_RING_SIZE-1) + flag = 0xe0000000 | DESC_RING_WRAP; + + tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag); + + /* Pass ownership to the chip. */ + tp->tx_ring[entry].status = cpu_to_le32(DescOwned); + tp->cur_tx++; + +#ifndef DEMAND_POLLTX + outl(0, dev->base_addr + CSR1); + dev->trans_start = jiffies; +#endif + if (!tp->tx_full) + clear_bit(0, (void*)&dev->tbusy); + + return 0; +} + +/* clean up tx dma ring */ +static int tulip_tx_clean(struct device *dev) +{ + struct tulip_private *tp; + unsigned int dirty_tx; + int ret; + tp = (struct tulip_private *)dev->priv; + + for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0; dirty_tx++) { + int entry = dirty_tx % TX_RING_SIZE; + int status = le32_to_cpu(tp->tx_ring[entry].status); +#ifdef PREFETCH + int next_entry = (dirty_tx+1) % TX_RING_SIZE; + volatile int next_status; + next_status = tp->tx_ring[next_entry].status; +#endif + if (status < 0) break; /* It still hasn't been Txed */ + + /* Check for Rx filter setup frames. */ + if (tp->tx_skbuff[entry] == NULL) + continue; + + if (status & 0x8000) { + /* There was an major error, log it. */ + tp->stats.tx_errors++; + if (status & 0x4104) tp->stats.tx_aborted_errors++; + if (status & 0x0C00) tp->stats.tx_carrier_errors++; + if (status & 0x0200) tp->stats.tx_window_errors++; + if (status & 0x0002) tp->stats.tx_fifo_errors++; + if ((status & 0x0080) && tp->full_duplex == 0) + tp->stats.tx_heartbeat_errors++; + } else { + tp->stats.tx_bytes += tp->tx_ring[entry].length & 0x7ff; + tp->stats.collisions += (status >> 3) & 15; + tp->stats.tx_packets++; + } + dev_kfree_skb(tp->tx_skbuff[entry]); + tp->tx_skbuff[entry] = 0; + } + + if (tp->tx_full && dev->tbusy && tp->cur_tx-dirty_tx < TX_RING_SIZE-2) { + /* The ring is no longer full, clear tbusy. */ + tp->tx_full = 0; + dev->tbusy = 0; + } + + tp->dirty_tx = dirty_tx; + ret = tp->cur_tx - tp->dirty_tx; + + return ret; +} +#endif POLLIT + /* * Local variables: