There is no good reason to not support userspace specifying the network namespace during device creation and it seems a handy thing to do. We have to be a little extra careful in this case to ensure that the network namespace exists through the point where we call register_netdevice. In addition we need to pass the network namespace to the rtnl_link_ops.newlink method so we can properly create the new device in another namespace and have it be a vlan device of a device in our current network namespace. In summary this patch makes ip link add somename netns NNN type sometype do the obvious thing instead of ignoring the network namespace parameter. Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> --- drivers/net/macvlan.c | 4 ++-- drivers/net/veth.c | 5 +++-- include/net/rtnetlink.h | 3 ++- net/8021q/vlan_netlink.c | 4 ++-- net/core/rtnetlink.c | 17 ++++++++++++++++- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4239450..fc5933b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -416,7 +416,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int macvlan_newlink(struct net_device *dev, +static int macvlan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct macvlan_dev *vlan = netdev_priv(dev); @@ -427,7 +427,7 @@ static int macvlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); + lowerdev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (lowerdev == NULL) return -ENODEV; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 31cd817..3a2d818 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -335,7 +335,7 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) static struct rtnl_link_ops veth_link_ops; -static int veth_newlink(struct net_device *dev, +static int veth_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { int err; @@ -375,7 +375,7 @@ static int veth_newlink(struct net_device *dev, else snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); - peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp); + peer = rtnl_create_link(net, ifname, &veth_link_ops, tbp); if (IS_ERR(peer)) return PTR_ERR(peer); @@ -383,6 +383,7 @@ static int veth_newlink(struct net_device *dev, random_ether_addr(peer->dev_addr); err = register_netdevice(peer); + put_net(peer->nd_net); if (err < 0) goto err_register_peer; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 3c1895e..dbf546f 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -55,7 +55,8 @@ struct rtnl_link_ops { int (*validate)(struct nlattr *tb[], struct nlattr *data[]); - int (*newlink)(struct net_device *dev, + int (*newlink)(struct net *net, + struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]); int (*changelink)(struct net_device *dev, diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index e9c91dc..e6190f7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -100,7 +100,7 @@ static int vlan_changelink(struct net_device *dev, return 0; } -static int vlan_newlink(struct net_device *dev, +static int vlan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -112,7 +112,7 @@ static int vlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8862498..069b176 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1002,6 +1002,19 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, goto err_free; } + /* To support userspace specifying a network namespace during + * device creation we grab the network namespace here and hold + * it until just after register_netdevice to prevent races. + */ + if (!tb[IFLA_NET_NS_PID]) + get_net(net); + else { + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + if (IS_ERR(net)) { + err = PTR_ERR(net); + goto err_free; + } + } dev_net_set(dev, net); dev->rtnl_link_ops = ops; @@ -1150,10 +1163,12 @@ replay: if (IS_ERR(dev)) err = PTR_ERR(dev); else if (ops->newlink) - err = ops->newlink(dev, tb, data); + err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); + if (!IS_ERR(dev)) + put_net(dev->nd_net); if (err < 0 && !IS_ERR(dev)) free_netdev(dev); return err; -- 1.5.3.rc6.17.g1911 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers