Eric W. Biederman wrote: > There is no good reason to not support userspace specifying the > network namespace during device creation and it seems a handy > thing to do. > > We have to be a little extra careful in this case to ensure that > the network namespace exists through the point where we call > register_netdevice. > > In addition we need to pass the network namespace to the > rtnl_link_ops.newlink method so we can properly create > the new device in another namespace and have it be a vlan > device of a device in our current network namespace. > > In summary this patch makes ip link add somename netns NNN type sometype > do the obvious thing instead of ignoring the network namespace parameter. > > Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> > --- > drivers/net/macvlan.c | 4 ++-- > drivers/net/veth.c | 5 +++-- > include/net/rtnetlink.h | 3 ++- > net/8021q/vlan_netlink.c | 4 ++-- > net/core/rtnetlink.c | 17 ++++++++++++++++- > 5 files changed, 25 insertions(+), 8 deletions(-) > > diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c > index 4239450..fc5933b 100644 > --- a/drivers/net/macvlan.c > +++ b/drivers/net/macvlan.c > @@ -416,7 +416,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) > return 0; > } > > -static int macvlan_newlink(struct net_device *dev, > +static int macvlan_newlink(struct net *net, struct net_device *dev, > struct nlattr *tb[], struct nlattr *data[]) > { > struct macvlan_dev *vlan = netdev_priv(dev); > @@ -427,7 +427,7 @@ static int macvlan_newlink(struct net_device *dev, > if (!tb[IFLA_LINK]) > return -EINVAL; > > - lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); > + lowerdev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); > if (lowerdev == NULL) > return -ENODEV; > > diff --git a/drivers/net/veth.c b/drivers/net/veth.c > index 31cd817..3a2d818 100644 > --- a/drivers/net/veth.c > +++ b/drivers/net/veth.c > @@ -335,7 +335,7 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) > > static struct rtnl_link_ops veth_link_ops; > > -static int veth_newlink(struct net_device *dev, > +static int veth_newlink(struct net *net, struct net_device *dev, > struct nlattr *tb[], struct nlattr *data[]) > { > int err; > @@ -375,7 +375,7 @@ static int veth_newlink(struct net_device *dev, > else > snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); > > - peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp); > + peer = rtnl_create_link(net, ifname, &veth_link_ops, tbp); > if (IS_ERR(peer)) > return PTR_ERR(peer); > > @@ -383,6 +383,7 @@ static int veth_newlink(struct net_device *dev, > random_ether_addr(peer->dev_addr); > > err = register_netdevice(peer); > + put_net(peer->nd_net); > if (err < 0) > goto err_register_peer; > > diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h > index 3c1895e..dbf546f 100644 > --- a/include/net/rtnetlink.h > +++ b/include/net/rtnetlink.h > @@ -55,7 +55,8 @@ struct rtnl_link_ops { > int (*validate)(struct nlattr *tb[], > struct nlattr *data[]); > > - int (*newlink)(struct net_device *dev, > + int (*newlink)(struct net *net, > + struct net_device *dev, > struct nlattr *tb[], > struct nlattr *data[]); > int (*changelink)(struct net_device *dev, > diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c > index e9c91dc..e6190f7 100644 > --- a/net/8021q/vlan_netlink.c > +++ b/net/8021q/vlan_netlink.c > @@ -100,7 +100,7 @@ static int vlan_changelink(struct net_device *dev, > return 0; > } > > -static int vlan_newlink(struct net_device *dev, > +static int vlan_newlink(struct net *net, struct net_device *dev, > struct nlattr *tb[], struct nlattr *data[]) > { > struct vlan_dev_info *vlan = vlan_dev_info(dev); > @@ -112,7 +112,7 @@ static int vlan_newlink(struct net_device *dev, > > if (!tb[IFLA_LINK]) > return -EINVAL; > - real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); > + real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); Hmm, if the macvlan is created inside a namespace, the network namespace specified in the parameter function will not be the namespace where belongs IFLA_LINK and the __dev_get_by_index will fail, no ? > if (!real_dev) > return -ENODEV; > > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c > index 8862498..069b176 100644 > --- a/net/core/rtnetlink.c > +++ b/net/core/rtnetlink.c > @@ -1002,6 +1002,19 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, > goto err_free; > } > > + /* To support userspace specifying a network namespace during > + * device creation we grab the network namespace here and hold > + * it until just after register_netdevice to prevent races. > + */ > + if (!tb[IFLA_NET_NS_PID]) > + get_net(net); > + else { > + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); > + if (IS_ERR(net)) { > + err = PTR_ERR(net); > + goto err_free; > + } > + } > dev_net_set(dev, net); > dev->rtnl_link_ops = ops; > > @@ -1150,10 +1163,12 @@ replay: > if (IS_ERR(dev)) > err = PTR_ERR(dev); > else if (ops->newlink) > - err = ops->newlink(dev, tb, data); > + err = ops->newlink(net, dev, tb, data); > else > err = register_netdevice(dev); > > + if (!IS_ERR(dev)) > + put_net(dev->nd_net); If there is an error in ops->newlink or register_netdevice, we will exit without releasing the net refcount. > if (err < 0 && !IS_ERR(dev)) > free_netdev(dev); > return err; _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers