Richard Procter and I worked to remove cli/sti to add proper SMP support (I did the original stuff and Richard did the actual current code :)).
Besides that, Richard did a great jog improving the perfomance of the driver quite a bit:
- Improve mc32_command by 770% (438% non-inlined) over the semaphore version (at a cost of 1 sem + 2 completions per driver).
- Removed mutex covering mc32_send_packet (hard_start_xmit). This lets the interrupt handler operate concurrently and removes unnecessary locking. It makes the code only slightly more brittle
Original post:
http://marc.theaimsgroup.com/?l=linux-netdev&m=106438449315202&w=2
Since it didn't apply cleanly against 2.6.0-test6, I forward ported it. Patch attached.
Jeff, please consider applying,
Thanks.
Felipe
--- linux-2.6.0-test6/drivers/net/3c527.c.orig 2003-10-06 15:21:38.000000000 -0300 +++ linux-2.6.0-test6/drivers/net/3c527.c 2003-10-06 16:18:32.000000000 -0300 @@ -1,9 +1,10 @@ -/* 3c527.c: 3Com Etherlink/MC32 driver for Linux 2.4 +/* 3c527.c: 3Com Etherlink/MC32 driver for Linux 2.4 and 2.6. * * (c) Copyright 1998 Red Hat Software Inc * Written by Alan Cox. * Further debugging by Carl Drougge. - * Modified by Richard Procter (rnp@netlink.co.nz) + * Initial SMP support by Felipe W Damasio <felipewd@terra.com.br> + * Heavily modified by Richard Procter <rnp@paradise.net.nz> * * Based on skeleton.c written 1993-94 by Donald Becker and ne2.c * (for the MCA stuff) written by Wim Dumon. @@ -17,11 +18,11 @@ */ #define DRV_NAME "3c527" -#define DRV_VERSION "0.6a" -#define DRV_RELDATE "2001/11/17" +#define DRV_VERSION "0.7-SMP" +#define DRV_RELDATE "2003/10/06" static const char *version = -DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " Richard Proctor (rnp@netlink.co.nz)\n"; +DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " Richard Procter <rnp@paradise.net.nz>\n"; /** * DOC: Traps for the unwary @@ -100,7 +101,9 @@ #include <linux/string.h> #include <linux/wait.h> #include <linux/ethtool.h> +#include <linux/completion.h> +#include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> @@ -152,8 +155,10 @@ /* Information that needs to be kept for each board. */ struct mc32_local { - struct net_device_stats net_stats; int slot; + + u32 base; + struct net_device_stats net_stats; volatile struct mc32_mailbox *rx_box; volatile struct mc32_mailbox *tx_box; volatile struct mc32_mailbox *exec_box; @@ -163,22 +168,22 @@ u16 tx_len; /* Transmit list count */ u16 rx_len; /* Receive list count */ - u32 base; - u16 exec_pending; - u16 mc_reload_wait; /* a multicast load request is pending */ + u16 xceiver_desired_state; /* HALTED or RUNNING */ + u16 cmd_nonblocking; /* Thread is uninterested in command result */ + u16 mc_reload_wait; /* A multicast load request is pending */ u32 mc_list_valid; /* True when the mclist is set */ - u16 xceiver_state; /* Current transceiver state. bitmapped */ - u16 desired_state; /* The state we want the transceiver to be in */ - atomic_t tx_count; /* buffers left */ - wait_queue_head_t event; struct mc32_ring_desc tx_ring[TX_RING_LEN]; /* Host Transmit ring */ struct mc32_ring_desc rx_ring[RX_RING_LEN]; /* Host Receive ring */ - u16 tx_ring_tail; /* index to tx de-queue end */ - u16 tx_ring_head; /* index to tx en-queue end */ - + atomic_t tx_count; /* buffers left */ + volatile u16 tx_ring_head; /* index to tx en-queue end */ + u16 tx_ring_tail; /* index to tx de-queue end */ u16 rx_ring_tail; /* index to rx de-queue end */ + + struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct completion execution_cmd; /* Card has completed an execute command */ + struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; /* The station (ethernet) address prefix, used for a sanity check. */ @@ -234,7 +239,6 @@ { static int current_mca_slot = -1; int i; - int adapter_found = 0; SET_MODULE_OWNER(dev); @@ -245,11 +249,11 @@ Autodetecting MCA cards is extremely simple. Just search for the card. */ - for(i = 0; (mc32_adapters[i].name != NULL) && !adapter_found; i++) { + for(i = 0; (mc32_adapters[i].name != NULL); i++) { current_mca_slot = mca_find_unused_adapter(mc32_adapters[i].id, 0); - if((current_mca_slot != MCA_NOTFOUND) && !adapter_found) { + if(current_mca_slot != MCA_NOTFOUND) { if(!mc32_probe1(dev, current_mca_slot)) { mca_set_adapter_name(current_mca_slot, @@ -407,7 +411,7 @@ * Grab the IRQ */ - i = request_irq(dev->irq, &mc32_interrupt, SA_SHIRQ, dev->name, dev); + i = request_irq(dev->irq, &mc32_interrupt, SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev); if (i) { release_region(dev->base_addr, MC32_IO_EXTENT); printk(KERN_ERR "%s: unable to get IRQ %d.\n", dev->name, dev->irq); @@ -496,7 +500,9 @@ lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ - init_waitqueue_head(&lp->event); + init_MUTEX_LOCKED(&lp->cmd_mutex); + init_completion(&lp->execution_cmd); + init_completion(&lp->xceiver_cmd); printk("%s: Firmware Rev %d. %d RX buffers, %d TX buffers. Base of 0x%08X.\n", dev->name, lp->exec_box->data[12], lp->rx_len, lp->tx_len, lp->base); @@ -510,10 +516,6 @@ dev->watchdog_timeo = HZ*5; /* Board does all the work */ dev->ethtool_ops = &netdev_ethtool_ops; - lp->xceiver_state = HALTED; - - lp->tx_ring_tail=lp->tx_ring_head=0; - /* Fill in the fields of the device structure with ethernet values. */ ether_setup(dev); @@ -537,7 +539,7 @@ * status of any pending commands and takes very little time at all. */ -static void mc32_ready_poll(struct net_device *dev) +static inline void mc32_ready_poll(struct net_device *dev) { int ioaddr = dev->base_addr; while(!(inb(ioaddr+HOST_STATUS)&HOST_STATUS_CRR)); @@ -552,31 +554,36 @@ * @len: Length of the data block * * Send a command from interrupt state. If there is a command - * currently being executed then we return an error of -1. It simply - * isn't viable to wait around as commands may be slow. Providing we - * get in, we busy wait for the board to become ready to accept the - * command and issue it. We do not wait for the command to complete - * --- the card will interrupt us when it's done. + * currently being executed then we return an error of -1. It + * simply isn't viable to wait around as commands may be + * slow. This can theoretically be starved on SMP, but it's hard + * to see a realistic situation. We do not wait for the command + * to complete --- we rely on the interrupt handler to tidy up + * after us. */ static int mc32_command_nowait(struct net_device *dev, u16 cmd, void *data, int len) { struct mc32_local *lp = (struct mc32_local *)dev->priv; int ioaddr = dev->base_addr; - - if(lp->exec_pending) - return -1; + int ret = -1; - lp->exec_pending=3; - lp->exec_box->mbox=0; - lp->exec_box->mbox=cmd; - memcpy((void *)lp->exec_box->data, data, len); - barrier(); /* the memcpy forgot the volatile so be sure */ + if (down_trylock(&lp->cmd_mutex) == 0) + { + lp->cmd_nonblocking=1; + lp->exec_box->mbox=0; + lp->exec_box->mbox=cmd; + memcpy((void *)lp->exec_box->data, data, len); + barrier(); /* the memcpy forgot the volatile so be sure */ + + /* Send the command */ + mc32_ready_poll(dev); + outb(1<<6, ioaddr+HOST_CMD); - /* Send the command */ - while(!(inb(ioaddr+HOST_STATUS)&HOST_STATUS_CRR)); - outb(1<<6, ioaddr+HOST_CMD); - return 0; + ret = 0; + /* Interrupt handler will signal mutex on completion */ + } + return ret; } @@ -590,7 +597,7 @@ * Sends exec commands in a user context. This permits us to wait around * for the replies and also to wait for the command buffer to complete * from a previous command before we execute our command. After our - * command completes we will complete any pending multicast reload + * command completes we will attempt any pending multicast reload * we blocked off by hogging the exec buffer. * * You feed the card a command, you wait, it interrupts you get a @@ -598,66 +605,37 @@ * commands for filter list changes which come in at bh level from things * like IPV6 group stuff. * - * We have a simple state machine - * - * 0 - nothing issued - * - * 1 - command issued, wait reply - * - * 2 - reply waiting - reader then goes to state 0 - * - * 3 - command issued, trash reply. In which case the irq - * takes it back to state 0 - * */ static int mc32_command(struct net_device *dev, u16 cmd, void *data, int len) { struct mc32_local *lp = (struct mc32_local *)dev->priv; int ioaddr = dev->base_addr; - unsigned long flags; int ret = 0; + + down(&lp->cmd_mutex); /* - * Wait for a command - */ - - save_flags(flags); - cli(); - - while(lp->exec_pending) - sleep_on(&lp->event); - - /* - * Issue mine + * My turn */ - lp->exec_pending=1; - - restore_flags(flags); - + lp->cmd_nonblocking=0; lp->exec_box->mbox=0; lp->exec_box->mbox=cmd; memcpy((void *)lp->exec_box->data, data, len); barrier(); /* the memcpy forgot the volatile so be sure */ - /* Send the command */ - while(!(inb(ioaddr+HOST_STATUS)&HOST_STATUS_CRR)); - outb(1<<6, ioaddr+HOST_CMD); - - save_flags(flags); - cli(); + mc32_ready_poll(dev); + outb(1<<6, ioaddr+HOST_CMD); - while(lp->exec_pending!=2) - sleep_on(&lp->event); - lp->exec_pending=0; - restore_flags(flags); + wait_for_completion(&lp->execution_cmd); if(lp->exec_box->mbox&(1<<13)) ret = -1; + up(&lp->cmd_mutex); /* - * A multicast set got blocked - do it now + * A multicast set got blocked - try it now */ if(lp->mc_reload_wait) @@ -676,10 +654,8 @@ * This may be called from the interrupt state, where it is used * to restart the rx ring if the card runs out of rx buffers. * - * First, we check if it's ok to start the transceiver. We then show - * the card where to start in the rx ring and issue the - * commands to start reception and transmission. We don't wait - * around for these to complete. + * We must first check if it's ok to (re)start the transceiver. See + * mc32_close for details. */ static void mc32_start_transceiver(struct net_device *dev) { @@ -688,24 +664,20 @@ int ioaddr = dev->base_addr; /* Ignore RX overflow on device closure */ - if (lp->desired_state==HALTED) + if (lp->xceiver_desired_state==HALTED) return; + /* Give the card the offset to the post-EOL-bit RX descriptor */ mc32_ready_poll(dev); - - lp->tx_box->mbox=0; lp->rx_box->mbox=0; - - /* Give the card the offset to the post-EOL-bit RX descriptor */ lp->rx_box->data[0]=lp->rx_ring[prev_rx(lp->rx_ring_tail)].p->next; - outb(HOST_CMD_START_RX, ioaddr+HOST_CMD); mc32_ready_poll(dev); + lp->tx_box->mbox=0; outb(HOST_CMD_RESTRT_TX, ioaddr+HOST_CMD); /* card ignores this on RX restart */ /* We are not interrupted on start completion */ - lp->xceiver_state=RUNNING; } @@ -725,24 +697,18 @@ { struct mc32_local *lp = (struct mc32_local *)dev->priv; int ioaddr = dev->base_addr; - unsigned long flags; mc32_ready_poll(dev); - lp->tx_box->mbox=0; lp->rx_box->mbox=0; outb(HOST_CMD_SUSPND_RX, ioaddr+HOST_CMD); + wait_for_completion(&lp->xceiver_cmd); + mc32_ready_poll(dev); + lp->tx_box->mbox=0; outb(HOST_CMD_SUSPND_TX, ioaddr+HOST_CMD); - - save_flags(flags); - cli(); - - while(lp->xceiver_state!=HALTED) - sleep_on(&lp->event); - - restore_flags(flags); + wait_for_completion(&lp->xceiver_cmd); } @@ -754,7 +720,7 @@ * the point where mc32_start_transceiver() can be called. * * The card sets up the receive ring for us. We are required to use the - * ring it provides although we can change the size of the ring. + * ring it provides, although the size of the ring is configurable. * * We allocate an sk_buff for each ring entry in turn and * initalise its house-keeping info. At the same time, we read @@ -775,7 +741,7 @@ rx_base=lp->rx_chain; - for(i=0;i<RX_RING_LEN;i++) + for(i=0; i<RX_RING_LEN; i++) { lp->rx_ring[i].skb=alloc_skb(1532, GFP_KERNEL); skb_reserve(lp->rx_ring[i].skb, 18); @@ -812,21 +778,20 @@ * * Free the buffer for each ring slot. This may be called * before mc32_load_rx_ring(), eg. on error in mc32_open(). + * Requires rx skb pointers to point to a valid skb, or NULL. */ static void mc32_flush_rx_ring(struct net_device *dev) { struct mc32_local *lp = (struct mc32_local *)dev->priv; - struct sk_buff *skb; int i; for(i=0; i < RX_RING_LEN; i++) - { - skb = lp->rx_ring[i].skb; - if (skb!=NULL) { - kfree_skb(skb); - skb=NULL; + { + if (lp->rx_ring[i].skb) { + dev_kfree_skb(lp->rx_ring[i].skb); + lp->rx_ring[i].skb = NULL; } lp->rx_ring[i].p=NULL; } @@ -858,7 +823,7 @@ tx_base=lp->tx_box->data[0]; - for(i=0;i<lp->tx_len;i++) + for(i=0 ; i<TX_RING_LEN ; i++) { p=isa_bus_to_virt(lp->base+tx_base); lp->tx_ring[i].p=p; @@ -868,8 +833,8 @@ } /* -1 so that tx_ring_head cannot "lap" tx_ring_tail, */ - /* which would be bad news for mc32_tx_ring as cur. implemented */ - + /* see mc32_tx_ring */ + atomic_set(&lp->tx_count, TX_RING_LEN-1); lp->tx_ring_head=lp->tx_ring_tail=0; } @@ -878,45 +843,27 @@ /** * mc32_flush_tx_ring - free transmit ring * @lp: Local data of 3c527 to flush the tx ring of - * - * We have to consider two cases here. We want to free the pending - * buffers only. If the ring buffer head is past the start then the - * ring segment we wish to free wraps through zero. The tx ring - * house-keeping variables are then reset. + * + * If the ring is non-empty, zip over the it, freeing any + * allocated skb_buffs. The tx ring house-keeping variables are + * then reset. Requires rx skb pointers to point to a valid skb, + * or NULL. */ static void mc32_flush_tx_ring(struct net_device *dev) { struct mc32_local *lp = (struct mc32_local *)dev->priv; + + int i; - if(lp->tx_ring_tail!=lp->tx_ring_head) + for (i=0; i < TX_RING_LEN; i++) { - int i; - if(lp->tx_ring_tail < lp->tx_ring_head) - { - for(i=lp->tx_ring_tail;i<lp->tx_ring_head;i++) - { - dev_kfree_skb(lp->tx_ring[i].skb); - lp->tx_ring[i].skb=NULL; - lp->tx_ring[i].p=NULL; - } - } - else + if (lp->tx_ring[i].skb) { - for(i=lp->tx_ring_tail; i<TX_RING_LEN; i++) - { - dev_kfree_skb(lp->tx_ring[i].skb); - lp->tx_ring[i].skb=NULL; - lp->tx_ring[i].p=NULL; - } - for(i=0; i<lp->tx_ring_head; i++) - { - dev_kfree_skb(lp->tx_ring[i].skb); - lp->tx_ring[i].skb=NULL; - lp->tx_ring[i].p=NULL; - } - } - } + dev_kfree_skb(lp->tx_ring[i].skb); + lp->tx_ring[i].skb = NULL; + } + } atomic_set(&lp->tx_count, 0); lp->tx_ring_tail=lp->tx_ring_head=0; @@ -955,9 +902,14 @@ regs=inb(ioaddr+HOST_CTRL); regs|=HOST_CTRL_INTE; outb(regs, ioaddr+HOST_CTRL); - /* + * Allow ourselves to issue commands + */ + + up(&lp->cmd_mutex); + + /* * Send the indications on command */ @@ -1008,7 +960,7 @@ return -ENOBUFS; } - lp->desired_state = RUNNING; + lp->xceiver_desired_state = RUNNING; /* And finally, set the ball rolling... */ mc32_start_transceiver(dev); @@ -1045,61 +997,64 @@ * Transmit a buffer. This normally means throwing the buffer onto * the transmit queue as the queue is quite large. If the queue is * full then we set tx_busy and return. Once the interrupt handler - * gets messages telling it to reclaim transmit queue entries we will + * gets messages telling it to reclaim transmit queue entries, we will * clear tx_busy and the kernel will start calling this again. * - * We use cli rather than spinlocks. Since I have no access to an SMP - * MCA machine I don't plan to change it. It is probably the top - * performance hit for this driver on SMP however. + * We do not disable interrupts or acquire any locks; this can + * run concurrently with mc32_tx_ring(), and the function itself + * is serialised at a higher layer. However, this makes it + * crucial that we update lp->tx_ring_head only after we've + * established a valid packet in the tx ring (and is why we mark + * tx_ring_head volatile). */ static int mc32_send_packet(struct sk_buff *skb, struct net_device *dev) { struct mc32_local *lp = (struct mc32_local *)dev->priv; - unsigned long flags; - + + u16 head = lp->tx_ring_head; + volatile struct skb_header *p, *np; netif_stop_queue(dev); - save_flags(flags); - cli(); - - if(atomic_read(&lp->tx_count)==0) - { - restore_flags(flags); + if(atomic_read(&lp->tx_count)==0) { return 1; } + skb = skb_padto(skb, ETH_ZLEN); + if (skb == NULL) { + netif_wake_queue(dev); + return 0; + } + atomic_dec(&lp->tx_count); /* P is the last sending/sent buffer as a pointer */ - p=lp->tx_ring[lp->tx_ring_head].p; - - lp->tx_ring_head=next_tx(lp->tx_ring_head); - + p=lp->tx_ring[head].p; + + head = next_tx(head); + /* NP is the buffer we will be loading */ - np=lp->tx_ring[lp->tx_ring_head].p; + np=lp->tx_ring[head].p; + + /* We will need this to flush the buffer out */ + lp->tx_ring[head].skb=skb; - if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - goto out; - } + np->length = unlikely(skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len; - /* We will need this to flush the buffer out */ - lp->tx_ring[lp->tx_ring_head].skb = skb; - - np->length = (skb->len < ETH_ZLEN) ? ETH_ZLEN : skb->len; - np->data = isa_virt_to_bus(skb->data); np->status = 0; np->control = CONTROL_EOP | CONTROL_EOL; wmb(); - - p->control &= ~CONTROL_EOL; /* Clear EOL on p */ -out: - restore_flags(flags); + + /* + * The new frame has been setup; we can now + * let the card and interrupt handler "see" it + */ + + p->control &= ~CONTROL_EOL; + lp->tx_ring_head= head; netif_wake_queue(dev); return 0; @@ -1180,11 +1135,11 @@ { struct mc32_local *lp=dev->priv; volatile struct skb_header *p; - u16 rx_ring_tail = lp->rx_ring_tail; - u16 rx_old_tail = rx_ring_tail; - + u16 rx_ring_tail; + u16 rx_old_tail; int x=0; - + + rx_old_tail = rx_ring_tail = lp->rx_ring_tail; do { p=lp->rx_ring[rx_ring_tail].p; @@ -1273,8 +1228,13 @@ struct mc32_local *lp=(struct mc32_local *)dev->priv; volatile struct skb_header *np; - /* NB: lp->tx_count=TX_RING_LEN-1 so that tx_ring_head cannot "lap" tail here */ - + /* + * We rely on head==tail to mean 'queue empty'. + * This is why lp->tx_count=TX_RING_LEN-1: in order to prevent + * tx_ring_head wrapping to tail and confusing a 'queue empty' + * condition with 'queue full' + */ + while (lp->tx_ring_tail != lp->tx_ring_head) { u16 t; @@ -1386,8 +1346,7 @@ break; case 3: /* Halt */ case 4: /* Abort */ - lp->xceiver_state |= TX_HALTED; - wake_up(&lp->event); + complete(&lp->xceiver_cmd); break; default: printk("%s: strange tx ack %d\n", dev->name, status&7); @@ -1402,8 +1361,7 @@ break; case 3: /* Halt */ case 4: /* Abort */ - lp->xceiver_state |= RX_HALTED; - wake_up(&lp->event); + complete(&lp->xceiver_cmd); break; case 6: /* Out of RX buffers stat */ @@ -1419,26 +1377,17 @@ status>>=3; if(status&1) { + /* + * No thread is waiting: we need to tidy + * up ourself. + */ - /* 0=no 1=yes 2=replied, get cmd, 3 = wait reply & dump it */ - - if(lp->exec_pending!=3) { - lp->exec_pending=2; - wake_up(&lp->event); - } - else - { - lp->exec_pending=0; - - /* A new multicast set may have been - blocked while the old one was - running. If so, do it now. */ - + if (lp->cmd_nonblocking) { + up(&lp->cmd_mutex); if (lp->mc_reload_wait) mc32_reset_multicast_list(dev); - else - wake_up(&lp->event); } + else complete(&lp->execution_cmd); } if(status&2) { @@ -1496,7 +1445,7 @@ u8 regs; u16 one=1; - lp->desired_state = HALTED; + lp->xceiver_desired_state = HALTED; netif_stop_queue(dev); /* @@ -1508,12 +1457,10 @@ /* Shut down the transceiver */ mc32_halt_transceiver(dev); + + /* Ensure we issue no more commands beyond this point */ + down(&lp->cmd_mutex); - /* Catch any waiting commands */ - - while(lp->exec_pending==1) - sleep_on(&lp->event); - /* Ok the card is now stopping */ regs=inb(ioaddr+HOST_CTRL); @@ -1540,12 +1487,10 @@ static struct net_device_stats *mc32_get_stats(struct net_device *dev) { - struct mc32_local *lp; + struct mc32_local *lp = (struct mc32_local *)dev->priv;; mc32_update_stats(dev); - lp = (struct mc32_local *)dev->priv; - return &lp->net_stats; }