Hi, the following problems exist with the bonding driver (drivers/net/bonding.c) in 2.2.16 and 2.2.17-pre: 1. Missing entry in drivers/net/Space.c makes the driver inaccessible when not compiled as module. 2. Bug in netif_rx() in net/core/dev.c causes received packets to be lost when there is a backlog, this is valid not only for the bonding driver but for all drivers using IFF_SLAVE (the master device usually has no queue, only the slaves have them, due to the bug the received packed would be appended to the nonexisting master device queue). This problem will fully show up when bonding 100MBit devices, causing the bonded data rate to drop to 60-70Kbytes/s. The fixes for the above problems are attached as bonding-fix.diff and are fully tested. 3. The bonding driver will loop when packets are to be transmitted and no slaves are attached causing the system to freeze. This is especially bad as the bonding device has to be up to attach slaves, so this has a certain probability to occur. The driver has bugs in bond_close() that may hang the system (not fully releasing slaves, possible interrupt race problems). It might hang the system in bond_open(). The fixes for the bonding driver are attached as bonding-changes.diff. Please note that they are not yet tested (not urgent for me, my servers tend to be up all the time). All patches are against 2.2.16. A final word for all who want to test 100MBit channel bonding: use fast systems! The maximum throughput with 2 bonded cards @ 100% cpu load is about 18Mbytes/s between two PIII-500 Systems (rough measurement with ftp get and top).
--- linux/drivers/net/bonding.c Sat Jul 15 10:24:35 2000 +++ linux-custom/drivers/net/bonding.c Sat Jul 15 10:43:44 2000 @@ -71,8 +71,12 @@ bonding_t *private = (struct bonding *) master->priv; slave_queue_t *queue = (struct slave_queue *) private->queue; slave_t *slave, *next; + int flags; + + save_flags(flags); + cli(); - for( slave=queue->head; slave != NULL; slave=next) { + for( slave=queue->head; slave != NULL; ) { #ifdef BONDING_DEBUG printk("freeing = %s\n", slave->dev->name); #endif @@ -80,9 +84,12 @@ slave->dev->slave = NULL; next = slave->next; kfree(slave); - queue->num_slaves++; + slave=next; + queue->num_slaves--; } + restore_flags(flags); + MOD_DEC_USE_COUNT; return 0; } @@ -121,14 +128,15 @@ return -EBUSY; } - slave->slave = master; /* save the master in slave->slave */ - slave->flags |= IFF_SLAVE; - if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { + restore_flags(flags); return -ENOMEM; } memset(new_slave, 0, sizeof(slave_t)); + slave->slave = master; /* save the master in slave->slave */ + slave->flags |= IFF_SLAVE; + new_slave->dev = slave; if (queue->head == NULL) { @@ -240,6 +248,11 @@ struct slave_queue *queue = bond->queue; int good = 0; + if(!queue->num_slaves) { + dev_kfree_skb(skb); + return 0; + } + while (good == 0) { slave = queue->current_slave->dev; if (slave->flags & (IFF_UP|IFF_RUNNING)) {
diff -rNu linux/drivers/net/Space.c linux-custom/drivers/net/Space.c --- linux/drivers/net/Space.c Wed Jun 7 23:26:43 2000 +++ linux-custom/drivers/net/Space.c Sat Jul 15 11:19:45 2000 @@ -106,6 +106,7 @@ extern int ncr885e_probe(struct device *); extern int cs89x0_probe(struct device *dev); extern int ethertap_probe(struct device *dev); +extern int bond_init(struct device *dev); extern int ether1_probe (struct device *dev); extern int ether3_probe (struct device *dev); extern int etherh_probe (struct device *dev); @@ -631,6 +632,12 @@ static struct device tap0_dev = { "tap0", 0, 0, 0, 0, NETLINK_TAPBASE, 0, 0 , 0, 0, NEXT_DEV, ethertap_probe, }; # undef NEXT_DEV # define NEXT_DEV (&tap0_dev) +#endif + +#ifdef CONFIG_BONDING +static struct device bonding_dev = { "bond0", 0, 0, 0, 0, 0, 0, 0, 0, 0, NEXT_D EV, bond_init, }; +# undef NEXT_DEV +# define NEXT_DEV (&bonding_dev) #endif #ifdef CONFIG_LANMEDIA diff -rNu linux/net/core/dev.c linux-custom/net/core/dev.c --- linux/net/core/dev.c Wed Jun 7 23:26:44 2000 +++ linux-custom/net/core/dev.c Sat Jul 15 11:18:37 2000 @@ -773,6 +773,10 @@ if (backlog.qlen <= netdev_max_backlog) { if (backlog.qlen) { if (netdev_dropping == 0) { + if (skb->dev->flags & IFF_SLAVE && + skb->dev->slave) { + skb->dev = skb->dev->slave; + } skb_queue_tail(&backlog,skb); mark_bh(NET_BH); return;