Re: Pb with bonding since 2.4.19

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Fabien Salvi wrote:
> 
> Bernd Eckenfels wrote:
> >
> > In article <3D5140D0.813D27A9@cri74.org> you wrote:
> > > Almost everything works fine with a 2.4.18 kernel
> > > There are some little problems about media status that can "setfault"
> > > ifconfig
> >
> > can you provide me with more info, so i can fix this? What was the
> > segfaulting command line?
> 
> Hello,
> The problem occurs in 2.4.18 kernel, I can't say if it's already the
> case in 2.4.19 because kernel freeze before I can try this :)
> 
> But, according to Chat Tindel, it shoud have been fixed in the latest
> patches :
> ftp://ftp.sourceforge.net/pub/sourceforge/bonding/bonding-2.4.18-20020729
> 
> If I'm not wrong, the problem is bout media reporting that not all
> network drivers support.
> I've forgotten to say that it freezes ifconfig with version 1.42
> With an older version, for example 1.39, I've got this :
> 
> eth1      Link encap:Ethernet  HWaddr 00:50:DA:33:BE:50  Media:net-tools
> 1.54
>                inet addr:192.168.0.6  Bcast:192.168.0.127
> Mask:255.255.255.128
>                UP BROADCAST RUNNING PROMISC ALLMULTI SLAVE MULTICAST
> MTU:1500  Metric:1
>                RX packets:1309 errors:0 dropped:0 overruns:0 frame:0
>                TX packets:623 errors:0 dropped:0 overruns:0 carrier:0
>                collisions:0 txqueuelen:100
>                Interrupt:9 Base address:0x4400
> 
> You can notice that the Media value is very strange...
> 
> But the main problem is not this, it's the 2.4.19 freeze with bonding.


Thanks to Chat Tindel help, I've tried the latest bonding patches for
2.4.18 :
ftp://ftp.sourceforge.net//pub/sourceforge/bonding/bonding-2.4.18-20020729

And it works fine, it corrected freeze problems that have the previous
patch 20020617

So, I port the 2.4.18 patch for 2.4.19
The only change is about bonding.c in drivers/net/bonding.c

I have attached the correct patch for 2.4.19 to this email.
(md5sum : baae6a5551bb2cbbc1c2beaccb52790a)

I've carefully tested it and it solves all problems...


B.R.

-------------
Fabien SALVI      Centre de Ressources Informatiques
                  Archamps, France -- http://www.cri74.org
                  PingOO GNU/linux distribution : http://www.pingoo.org
diff -Naur linux-2.4.19_orig/drivers/net/bonding.c linux-2.4.19/drivers/net/bonding.c
--- linux-2.4.19_orig/drivers/net/bonding.c	Sat Aug  3 02:39:44 2002
+++ linux-2.4.19/drivers/net/bonding.c	Thu Aug  8 11:58:09 2002
@@ -176,6 +176,16 @@
  *              Steve Mead <steve.mead at comdev dot cc>
  *     - Port Gleb Natapov's multicast support patchs from 2.4.12
  *       to 2.4.18 adding support for multicast.
+ *
+ * 2002/06/17 - Tony Cureington <tony.cureington * hp_com>
+ *     - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link;
+ *       actually changed function to use ETHTOOL, then MIIPHY, and finally
+ *       MIIREG to determine the link status
+ *     - fixed bad ifr_data pointer assignments in bond_ioctl
+ *     - corrected mode 1 being reported as active-backup in bond_get_info;
+ *       also added text to distinguish type of load balancing (rr or xor)
+ *     - change arp_ip_target module param from "1-12s" (array of 12 ptrs)
+ *       to "s" (a single ptr)
  */
 
 #include <linux/config.h>
@@ -210,6 +220,9 @@
 #include <linux/smp.h>
 #include <linux/if_ether.h>
 #include <linux/if_arp.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+
 
 /* monitor all links that often (in milliseconds). <=0 disables monitoring */
 #ifndef BOND_LINK_MON_INTERV
@@ -253,7 +266,7 @@
 MODULE_PARM(mode, "i");
 MODULE_PARM(arp_interval, "i");
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
-MODULE_PARM(arp_ip_target, "1-12s");
+MODULE_PARM(arp_ip_target, "s");
 MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form");
 MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor");
 MODULE_PARM(updelay, "i");
@@ -386,21 +399,54 @@
 {
 	static int (* ioctl)(struct net_device *, struct ifreq *, int);
 	struct ifreq ifr;
-	u16 *data = (u16 *)&ifr.ifr_data;
-		
-	/* data[0] automagically filled by the ioctl */
-	data[1] = 1; /* MII location 1 reports Link Status */
+	struct mii_ioctl_data *mii;
+	struct ethtool_value etool;
+
+	if ((ioctl = dev->do_ioctl) != NULL)  { /* ioctl to access MII */
+		/* TODO: set pointer to correct ioctl on a per team member */
+		/*       bases to make this more efficient. that is, once  */
+		/*       we determine the correct ioctl, we will always    */
+		/*       call it and not the others for that team          */
+		/*       member.                                           */
+
+		/* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */
+		/* for a period of time; we need to encourage link status  */
+		/* be reported by network drivers in real time; if the     */
+		/* value is cached, the mmimon module parm may have no     */
+		/* effect...                                               */
+	        etool.cmd = ETHTOOL_GLINK;
+	        ifr.ifr_data = (char*)&etool;
+		if (ioctl(dev, &ifr, SIOCETHTOOL) == 0) {
+			if (etool.data == 1) {
+				return(MII_LINK_READY);
+			} 
+			else { 
+				return(0);
+			} 
+		}
+
+		/*
+		 * We cannot assume that SIOCGMIIPHY will also read a
+		 * register; not all network drivers support that.
+		 */
+
+		/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
+		mii = (struct mii_ioctl_data *)&ifr.ifr_data;
+		if (ioctl(dev, &ifr, SIOCGMIIPHY) != 0) {
+			return MII_LINK_READY;	 /* can't tell */
+		}
+
+		mii->reg_num = 1;
+		if (ioctl(dev, &ifr, SIOCGMIIREG) == 0) {
+			/*
+			 * mii->val_out contains MII reg 1, BMSR
+			 * 0x0004 means link established
+			 */
+			return mii->val_out;
+		}
 
-	if (((ioctl = dev->do_ioctl) != NULL) &&  /* ioctl to access MII */
-	    (ioctl(dev, &ifr, SIOCGMIIPHY) == 0)) {
-		/* now, data[3] contains info about link status :
-		   - data[3] & 0x04 means link up
-		   - data[3] & 0x20 means end of auto-negociation
-		*/
-		return data[3];
-	} else {
-		return MII_LINK_READY;  /* spoof link up ( we can't check it) */
 	}
+	return MII_LINK_READY;  /* spoof link up ( we can't check it) */
 }
 
 static u16 bond_check_mii_link(bonding_t *bond)
@@ -598,12 +644,6 @@
 	 */
 	write_lock_irqsave(&bond->lock, flags);
 
-	/*
-	 * Lock the master device so that noone trys to transmit
-	 * while we're changing things
-	 */
-	spin_lock_bh(&master->xmit_lock);
-
 	/* set promiscuity flag to slaves */
 	if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) )
 		bond_set_promiscuity(bond, 1); 
@@ -637,7 +677,6 @@
 	bond_mc_list_destroy (bond);
 	bond_mc_list_copy (master->mc_list, bond, GFP_KERNEL);
 
-	spin_unlock_bh(&master->xmit_lock);
 	write_unlock_irqrestore(&bond->lock, flags);
 }
 
@@ -1707,7 +1746,7 @@
 
 	switch (cmd) {
 	case SIOCGMIIPHY:
-		data = (u16 *)&ifr->ifr_data;
+		data = (u16 *)ifr->ifr_data;
 		if (data == NULL) {
 			return -EINVAL;
 		}
@@ -1718,7 +1757,7 @@
 		 * We do this again just in case we were called by SIOCGMIIREG
 		 * instead of SIOCGMIIPHY.
 		 */
-		data = (u16 *)&ifr->ifr_data;
+		data = (u16 *)ifr->ifr_data;
 		if (data == NULL) {
 			return -EINVAL;
 		}
@@ -2035,7 +2074,28 @@
 		link = bond_check_mii_link(bond);
 
 		len += sprintf(buf + len, "Bonding Mode: ");
-		len += sprintf(buf + len, "%s\n", mode ? "active-backup" : "load balancing");
+
+		switch (mode) {
+			case BOND_MODE_ACTIVEBACKUP:
+				len += sprintf(buf + len, "%s\n", 
+						"active-backup");
+			break;
+
+			case BOND_MODE_ROUNDROBIN:
+				len += sprintf(buf + len, "%s\n", 
+						"load balancing (round-robin)");
+			break;
+
+			case BOND_MODE_XOR:
+				len += sprintf(buf + len, "%s\n", 
+						"load balancing (xor)");
+			break;
+
+			default:
+				len += sprintf(buf + len, "%s\n", 
+						"unknown");
+			break;
+		}
 
 		if (mode == BOND_MODE_ACTIVEBACKUP) {
 			read_lock_irqsave(&bond->lock, flags);
@@ -2138,7 +2198,9 @@
 }
 
 static struct notifier_block bond_netdev_notifier = {
-	notifier_call: bond_event,
+	bond_event,
+	NULL,
+	0
 };
 
 static int __init bond_init(struct net_device *dev)
@@ -2282,7 +2344,32 @@
 	}
 	memset(dev_bonds, 0, max_bonds*sizeof(struct net_device));
 
+	if (updelay < 0) {
+		printk(KERN_WARNING 
+		       "bonding_init(): updelay module parameter (%d), "
+		       "not in range 0-%d, so it was reset to 0\n",
+		       updelay, INT_MAX);
+		updelay = 0;
+	}
+
+	if (downdelay < 0) {
+		printk(KERN_WARNING 
+		       "bonding_init(): downdelay module parameter (%d), "
+		       "not in range 0-%d, so it was reset to 0\n",
+		       downdelay, INT_MAX);
+		downdelay = 0;
+	}
+
+	if (arp_interval < 0) {
+		printk(KERN_WARNING 
+		       "bonding_init(): arp_interval module parameter (%d), "
+		       "not in range 0-%d, so it was reset to %d\n",
+		       arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
+		arp_interval = BOND_LINK_ARP_INTERV;
+	}
+
 	if (arp_ip_target) {
+		/* TODO: check and log bad ip address */
 		if (my_inet_aton(arp_ip_target, &arp_target) == 0)  {
 			arp_interval = 0;
 		}

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux