2019-09-28, 16:48:32 +0000, Taehee Yoo wrote: > @@ -6790,23 +6878,45 @@ int netdev_walk_all_lower_dev(struct net_device *dev, > void *data), > void *data) > { > - struct net_device *ldev; > - struct list_head *iter; > - int ret; > + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; > + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; > + int ret, cur = 0; > > - for (iter = &dev->adj_list.lower, > - ldev = netdev_next_lower_dev(dev, &iter); > - ldev; > - ldev = netdev_next_lower_dev(dev, &iter)) { > - /* first is the lower device itself */ > - ret = fn(ldev, data); > - if (ret) > - return ret; > + now = dev; > + iter = &dev->adj_list.lower; > > - /* then look at all of its lower devices */ > - ret = netdev_walk_all_lower_dev(ldev, fn, data); > - if (ret) > - return ret; > + while (1) { > + if (now != dev) { > + ret = fn(now, data); > + if (ret) > + return ret; > + } > + > + next = NULL; > + while (1) { > + ldev = netdev_next_lower_dev(now, &iter); > + if (!ldev) > + break; > + > + if (!next) { > + next = ldev; > + niter = &ldev->adj_list.lower; > + } else { > + dev_stack[cur] = ldev; > + iter_stack[cur++] = &ldev->adj_list.lower; > + break; > + } > + } > + > + if (!next) { > + if (!cur) > + return 0; Hmm, I don't think this condition is correct. If we have this topology: bridge0 / | \ / | \ / | \ dummy0 vlan1 vlan2 | \ dummy1 dummy2 We end up with the expected lower/upper levels for all devices: | device | upper | lower | |---------+-------+-------| | dummy0 | 2 | 1 | | dummy1 | 3 | 1 | | dummy2 | 3 | 1 | | vlan1 | 2 | 2 | | vlan2 | 2 | 2 | | bridge0 | 1 | 3 | If we then add macvlan0 on top of bridge0: macvlan0 | | bridge0 / | \ / | \ / | \ dummy0 vlan1 vlan2 | \ dummy1 dummy2 we can observe that __netdev_update_upper_level is only called for some of the devices under bridge0. I added a perf probe: # perf probe -a '__netdev_update_upper_level dev->name:string' which gets hit for bridge0 (called directly by __netdev_upper_dev_link) and then dummy0, vlan1, dummy1. It is never called for vlan2 and dummy2. After this, we have the following levels (*): | device | upper | lower | |----------+-------+-------| | dummy0 | 3 | 1 | | dummy1 | 4 | 1 | | dummy2 | 3 | 1 | | vlan1 | 3 | 2 | | vlan2 | 2 | 2 | | bridge0 | 2 | 3 | | macvlan0 | 1 | 4 | For dummy0, dummy1, vlan1, the upper level has increased by 1, as expected. For dummy2 and vlan2, it's still the same, which is wrong. (*) observed easily by adding another probe: # perf probe -a 'dev_get_stats dev->name:string dev->upper_level dev->lower_level' and running "ip link" Or you can just add prints and recompile, of course :) > + next = dev_stack[--cur]; > + niter = iter_stack[cur]; > + } > + > + now = next; > + iter = niter; > } > > return 0; -- Sabrina