From: Parav Pandit <parav@xxxxxxxxxxxx> Provide an option to change net namespace of rdma device through netlink command. When multiple rdma devices exists in a system, and when containers are used, this will limit rdma device visibility in specified net namespace. An example command to change net namespace of mlx5_1 device to previously created net namespace 'foo' would be below. $ ip netns add foo $ rdma dev set mlx5_1 netns foo Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> --- drivers/infiniband/core/core_priv.h | 2 ++ drivers/infiniband/core/device.c | 56 ++++++++++++++++++++++++++--- drivers/infiniband/core/nldev.c | 13 ++++++- include/uapi/rdma/rdma_netlink.h | 6 +++- 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 5b0ffbb6b3c9..d4dd360769cb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -350,4 +350,6 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num, const char *name); void ib_port_unregister_module_stat(struct kobject *kobj); +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7fe4f8b880ee..fcbf2d4c865d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1452,9 +1452,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, mutex_lock(&device->unregistration_lock); /* - * If a device not under ib_device_get() or the unregistration_lock - * the namespace can be changed, or it can be unregistered. Check - * again under the lock. + * If a device not under ib_device_get() or if the unregistration_lock + * is not held, the namespace can be changed, or it can be unregistered. + * Check again under the lock. */ if (refcount_read(&device->refcount) == 0 || !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { @@ -1471,12 +1471,12 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, */ write_pnet(&device->coredev.rdma_net, net); + down_read(&devices_rwsem); /* * Currently rdma devices are system wide unique. So the device name * is guaranteed free in the new namespace. Publish the new namespace * at the sysfs level. */ - down_read(&devices_rwsem); ret = device_rename(&device->dev, dev_name(&device->dev)); up_read(&devices_rwsem); if (ret) { @@ -1488,7 +1488,7 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, } ret2 = enable_device_and_get(device); - if (ret2) + if (ret2) { /* * This shouldn't really happen, but if it does, let the user * retry at later point. So don't disable the device. @@ -1496,7 +1496,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, dev_warn(&device->dev, "%s: Couldn't re-enable device after namespace change\n", __func__); + } kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ib_device_put(device); out: mutex_unlock(&device->unregistration_lock); @@ -1505,6 +1507,50 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, return ret2; } +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd) +{ + struct net *net; + int ret; + + net = get_net_ns_by_fd(ns_fd); + if (IS_ERR(net)) { + ret = PTR_ERR(net); + goto net_err; + } + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto ns_err; + } + + /* + * Currently supported only for those providers which support + * disassociation and don't do port specific sysfs init. Once a + * port_cleanup infrastructure is implemented, this limitation will be + * removed. + */ + if (!dev->ops.disassociate_ucontext || dev->ops.init_port || + ib_devices_shared_netns) { + ret = -EOPNOTSUPP; + goto ns_err; + } + + get_device(&dev->dev); + ib_device_put(dev); + ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); + put_device(&dev->dev); + + put_net(net); + return ret; + +ns_err: + put_net(net); +net_err: + ib_device_put(dev); + return ret; +} + static struct pernet_operations rdma_dev_net_ops = { .init = rdma_dev_init_net, .exit = rdma_dev_exit_net, diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 8cb3851d212e..bced945a456d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -119,6 +119,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -691,9 +692,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); err = ib_device_rename(device, name); + goto done; } + if (tb[RDMA_NLDEV_NET_NS_FD]) { + u32 ns_fd; + + ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); + err = ib_device_set_netns_put(skb, device, ns_fd); + goto put_done; + } + +done: ib_device_put(device); +put_done: return err; } @@ -909,7 +921,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device, nlmsg_cancel(skb, nlh); goto out; } - nlmsg_end(skb, nlh); idx++; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index d49f491341f6..42a8bdc40a14 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -469,12 +469,16 @@ enum rdma_nldev_attr { * either shared or exclusive among multiple net namespaces. */ RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ - /* * Device protocol, e.g. ib, iw, usnic, roce and opa */ RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */ + /* + * File descriptor handle of the net namespace object + */ + RDMA_NLDEV_NET_NS_FD, /* u32 */ + /* * Always the end */ -- 2.19.2