This patch fixes a number of issues with the Ethernet bridging code in 2.5. 1. Locking deadlock between the add/delete interface path and the add/delete bridge path. Problem was that one path got the rtnl_lock then the ioctl_mutex, and the other used the opposite order. The fix for this was to take out the code path based locking (using ioctl_mutex) and use the existing br->lock rwlock and add another spinlock for the list of bridges. 2. Notify code has a boolean test backwards so it would never notify interface going down. 3. Ioctl and notify code was too lax about getting br->lock when necessary. Either no locking was done or reader lock was acquired when writer lock was necessary. 4. Add C99 initializer for notifier event This does not depend on my earlier the br_lock removal patches. Testing with a parallel set of scripts that up/down, add/remove on 8-way SMP system now works; before it would hang after 30 seconds. # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1143 -> 1.1144 # net/bridge/br_notify.c 1.1 -> 1.2 # net/bridge/br_ioctl.c 1.2 -> 1.3 # net/bridge/br.c 1.9 -> 1.10 # net/bridge/br_if.c 1.6 -> 1.7 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/03/20 shemminger@dev4-009.pdx.osdl.net 1.1144 # Replace ioctl_mutex with better locking # -------------------------------------------- # diff -Nru a/net/bridge/br.c b/net/bridge/br.c --- a/net/bridge/br.c Thu Mar 20 11:29:45 2003 +++ b/net/bridge/br.c Thu Mar 20 11:29:45 2003 @@ -60,18 +60,13 @@ return 0; } -static void __br_clear_ioctl_hook(void) -{ - br_ioctl_hook = NULL; -} - static void __exit br_deinit(void) { #ifdef CONFIG_NETFILTER br_netfilter_fini(); #endif unregister_netdevice_notifier(&br_device_notifier); - br_call_ioctl_atomic(__br_clear_ioctl_hook); + br_ioctl_hook = NULL; br_write_lock_bh(BR_NETPROTO_LOCK); br_handle_frame_hook = NULL; diff -Nru a/net/bridge/br_if.c b/net/bridge/br_if.c --- a/net/bridge/br_if.c Thu Mar 20 11:29:45 2003 +++ b/net/bridge/br_if.c Thu Mar 20 11:29:45 2003 @@ -23,6 +23,7 @@ #include "br_private.h" static struct net_bridge *bridge_list; +static spinlock_t bridge_lock = SPIN_LOCK_UNLOCKED; static int br_initial_port_cost(struct net_device *dev) { @@ -69,6 +70,7 @@ return 0; } +/* called with bridge_lock */ static struct net_bridge **__find_br(char *name) { struct net_bridge **b; @@ -188,8 +190,10 @@ return -EEXIST; } + spin_lock(&bridge_lock); br->next = bridge_list; bridge_list = br; + spin_unlock(&bridge_lock); br_inc_use_count(); register_netdev(&br->dev); @@ -202,17 +206,22 @@ struct net_bridge **b; struct net_bridge *br; - if ((b = __find_br(name)) == NULL) + spin_lock(&bridge_lock); + if ((b = __find_br(name)) == NULL) { + spin_unlock(&bridge_lock); return -ENXIO; + } br = *b; - - if (br->dev.flags & IFF_UP) + if (br->dev.flags & IFF_UP) { + spin_unlock(&bridge_lock); return -EBUSY; - - del_ifs(br); + } *b = br->next; + spin_unlock(&bridge_lock); + + del_ifs(br); unregister_netdev(&br->dev); kfree(br); @@ -272,6 +281,7 @@ struct net_bridge *br; int i; + spin_lock(&bridge_lock); br = bridge_list; for (i=0;i<num;i++) { if (br == NULL) @@ -280,6 +290,7 @@ indices[i] = br->dev.ifindex; br = br->next; } + spin_unlock(&bridge_lock); return i; } @@ -289,9 +300,11 @@ { struct net_bridge_port *p; + read_lock(&br->lock); p = br->port_list; while (p != NULL) { ifindices[p->port_no] = p->dev->ifindex; p = p->next; } + read_unlock(&br->lock); } diff -Nru a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c --- a/net/bridge/br_ioctl.c Thu Mar 20 11:29:45 2003 +++ b/net/bridge/br_ioctl.c Thu Mar 20 11:29:45 2003 @@ -53,6 +53,7 @@ { struct __bridge_info b; + read_lock(&br->lock); memset(&b, 0, sizeof(struct __bridge_info)); memcpy(&b.designated_root, &br->designated_root, 8); memcpy(&b.bridge_id, &br->bridge_id, 8); @@ -73,6 +74,7 @@ b.tcn_timer_value = br_timer_get_residue(&br->tcn_timer); b.topology_change_timer_value = br_timer_get_residue(&br->topology_change_timer); b.gc_timer_value = br_timer_get_residue(&br->gc_timer); + read_unlock(&br->lock); if (copy_to_user((void *)arg0, &b, sizeof(b))) return -EFAULT; @@ -96,21 +98,27 @@ } case BRCTL_SET_BRIDGE_FORWARD_DELAY: + write_lock(&br->lock); br->bridge_forward_delay = arg0; if (br_is_root_bridge(br)) br->forward_delay = arg0; + write_unlock(&br->lock); return 0; case BRCTL_SET_BRIDGE_HELLO_TIME: + write_lock(&br->lock); br->bridge_hello_time = arg0; if (br_is_root_bridge(br)) br->hello_time = arg0; + write_unlock(&br->lock); return 0; case BRCTL_SET_BRIDGE_MAX_AGE: + write_lock(&br->lock); br->bridge_max_age = arg0; if (br_is_root_bridge(br)) br->max_age = arg0; + write_unlock(&br->lock); return 0; case BRCTL_SET_AGEING_TIME: @@ -126,6 +134,7 @@ struct __port_info p; struct net_bridge_port *pt; + read_lock(&br->lock); if ((pt = br_get_port(br, arg1)) == NULL) return -EINVAL; @@ -143,6 +152,8 @@ p.forward_delay_timer_value = br_timer_get_residue(&pt->forward_delay_timer); p.hold_timer_value = br_timer_get_residue(&pt->hold_timer); + read_unlock(&br->lock); + if (copy_to_user((void *)arg0, &p, sizeof(p))) return -EFAULT; @@ -154,16 +165,20 @@ return 0; case BRCTL_SET_BRIDGE_PRIORITY: + write_lock(&br->lock); br_stp_set_bridge_priority(br, arg0); + write_unlock(&br->lock); return 0; case BRCTL_SET_PORT_PRIORITY: { struct net_bridge_port *p; + write_lock(&br->lock); if ((p = br_get_port(br, arg0)) == NULL) return -EINVAL; br_stp_set_port_priority(p, arg1); + write_unlock(&br->lock); return 0; } @@ -171,9 +186,11 @@ { struct net_bridge_port *p; + write_lock(&br->lock); if ((p = br_get_port(br, arg0)) == NULL) return -EINVAL; br_stp_set_path_cost(p, arg1); + write_unlock(&br->lock); return 0; } @@ -230,11 +247,9 @@ return -EOPNOTSUPP; } -static DECLARE_MUTEX(ioctl_mutex); int br_ioctl_deviceless_stub(unsigned long arg) { - int err; unsigned long i[3]; if (!capable(CAP_NET_ADMIN)) @@ -243,11 +258,7 @@ if (copy_from_user(i, (void *)arg, 3*sizeof(unsigned long))) return -EFAULT; - down(&ioctl_mutex); - err = br_ioctl_deviceless(i[0], i[1], i[2]); - up(&ioctl_mutex); - - return err; + return br_ioctl_deviceless(i[0], i[1], i[2]); } int br_ioctl(struct net_bridge *br, unsigned int cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2) @@ -257,18 +268,9 @@ if (!capable(CAP_NET_ADMIN)) return -EPERM; - down(&ioctl_mutex); err = br_ioctl_deviceless(cmd, arg0, arg1); if (err == -EOPNOTSUPP) err = br_ioctl_device(br, cmd, arg0, arg1, arg2); - up(&ioctl_mutex); return err; -} - -void br_call_ioctl_atomic(void (*fn)(void)) -{ - down(&ioctl_mutex); - fn(); - up(&ioctl_mutex); } diff -Nru a/net/bridge/br_notify.c b/net/bridge/br_notify.c --- a/net/bridge/br_notify.c Thu Mar 20 11:29:45 2003 +++ b/net/bridge/br_notify.c Thu Mar 20 11:29:45 2003 @@ -21,15 +21,14 @@ struct notifier_block br_device_notifier = { - br_device_event, - NULL, - 0 + .notifier_call = br_device_event }; static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev; struct net_bridge_port *p; + struct net_bridge *br; dev = ptr; p = dev->br_port; @@ -37,13 +36,15 @@ if (p == NULL) return NOTIFY_DONE; - switch (event) + br = p->br; + + switch (event) { case NETDEV_CHANGEADDR: - read_lock(&p->br->lock); + write_lock_bh(&br->lock); br_fdb_changeaddr(p, dev->dev_addr); - br_stp_recalculate_bridge_id(p->br); - read_unlock(&p->br->lock); + br_stp_recalculate_bridge_id(br); + write_unlock_bh(&br->lock); break; case NETDEV_GOING_DOWN: @@ -51,23 +52,23 @@ break; case NETDEV_DOWN: - if (p->br->dev.flags & IFF_UP) { - read_lock(&p->br->lock); - br_stp_disable_port(dev->br_port); - read_unlock(&p->br->lock); + if (br->dev.flags & IFF_UP) { + write_lock_bh(&br->lock); + br_stp_disable_port(p); + write_unlock_bh(&br->lock); } break; case NETDEV_UP: - if (p->br->dev.flags & IFF_UP) { - read_lock(&p->br->lock); - br_stp_enable_port(dev->br_port); - read_unlock(&p->br->lock); + if (!(br->dev.flags & IFF_UP)) { + write_lock_bh(&br->lock); + br_stp_enable_port(p); + write_unlock_bh(&br->lock); } break; case NETDEV_UNREGISTER: - br_del_if(dev->br_port->br, dev); + br_del_if(br, dev); break; } - : send the line "unsubscribe linux-net" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html