[PATCH rdma-next v2 3/3] RDMA/core: Add netlink command to change net namespace of rdma device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Parav Pandit <parav@xxxxxxxxxxxx>

Provide an option to change net namespace of rdma device through netlink
command. When multiple rdma devices exists in a system, and when
containers are used, this will limit rdma device visibility in specified
net namespace.

An example command to change net namespace of mlx5_1 device to
previously created net namespace 'foo' would be below.

$ ip netns add foo
$ rdma dev set mlx5_1 netns foo

Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
---
 drivers/infiniband/core/core_priv.h |  2 ++
 drivers/infiniband/core/device.c    | 56 ++++++++++++++++++++++++++---
 drivers/infiniband/core/nldev.c     | 13 ++++++-
 include/uapi/rdma/rdma_netlink.h    |  6 +++-
 4 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 5b0ffbb6b3c9..d4dd360769cb 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -350,4 +350,6 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
 				 const char *name);
 void ib_port_unregister_module_stat(struct kobject *kobj);
 
+int ib_device_set_netns_put(struct sk_buff *skb,
+			    struct ib_device *dev, u32 ns_fd);
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fe4f8b880ee..fcbf2d4c865d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1452,9 +1452,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
 	mutex_lock(&device->unregistration_lock);
 
 	/*
-	 * If a device not under ib_device_get() or the unregistration_lock
-	 * the namespace can be changed, or it can be unregistered. Check
-	 * again under the lock.
+	 * If a device not under ib_device_get() or if the unregistration_lock
+	 * is not held, the namespace can be changed, or it can be unregistered.
+	 * Check again under the lock.
 	 */
 	if (refcount_read(&device->refcount) == 0 ||
 	    !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
@@ -1471,12 +1471,12 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
 	 */
 	write_pnet(&device->coredev.rdma_net, net);
 
+	down_read(&devices_rwsem);
 	/*
 	 * Currently rdma devices are system wide unique. So the device name
 	 * is guaranteed free in the new namespace. Publish the new namespace
 	 * at the sysfs level.
 	 */
-	down_read(&devices_rwsem);
 	ret = device_rename(&device->dev, dev_name(&device->dev));
 	up_read(&devices_rwsem);
 	if (ret) {
@@ -1488,7 +1488,7 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
 	}
 
 	ret2 = enable_device_and_get(device);
-	if (ret2)
+	if (ret2) {
 		/*
 		 * This shouldn't really happen, but if it does, let the user
 		 * retry at later point. So don't disable the device.
@@ -1496,7 +1496,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
 		dev_warn(&device->dev,
 			 "%s: Couldn't re-enable device after namespace change\n",
 			 __func__);
+	}
 	kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
 	ib_device_put(device);
 out:
 	mutex_unlock(&device->unregistration_lock);
@@ -1505,6 +1507,50 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
 	return ret2;
 }
 
+int ib_device_set_netns_put(struct sk_buff *skb,
+			    struct ib_device *dev, u32 ns_fd)
+{
+	struct net *net;
+	int ret;
+
+	net = get_net_ns_by_fd(ns_fd);
+	if (IS_ERR(net)) {
+		ret = PTR_ERR(net);
+		goto net_err;
+	}
+
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+		ret = -EPERM;
+		goto ns_err;
+	}
+
+	/*
+	 * Currently supported only for those providers which support
+	 * disassociation and don't do port specific sysfs init. Once a
+	 * port_cleanup infrastructure is implemented, this limitation will be
+	 * removed.
+	 */
+	if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
+	    ib_devices_shared_netns) {
+		ret = -EOPNOTSUPP;
+		goto ns_err;
+	}
+
+	get_device(&dev->dev);
+	ib_device_put(dev);
+	ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
+	put_device(&dev->dev);
+
+	put_net(net);
+	return ret;
+
+ns_err:
+	put_net(net);
+net_err:
+	ib_device_put(dev);
+	return ret;
+}
+
 static struct pernet_operations rdma_dev_net_ops = {
 	.init = rdma_dev_init_net,
 	.exit = rdma_dev_exit_net,
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 8cb3851d212e..bced945a456d 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -119,6 +119,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
 				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
+	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -691,9 +692,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
 			    IB_DEVICE_NAME_MAX);
 		err = ib_device_rename(device, name);
+		goto done;
 	}
 
+	if (tb[RDMA_NLDEV_NET_NS_FD]) {
+		u32 ns_fd;
+
+		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
+		err = ib_device_set_netns_put(skb, device, ns_fd);
+		goto put_done;
+	}
+
+done:
 	ib_device_put(device);
+put_done:
 	return err;
 }
 
@@ -909,7 +921,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
 		nlmsg_cancel(skb, nlh);
 		goto out;
 	}
-
 	nlmsg_end(skb, nlh);
 
 	idx++;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index d49f491341f6..42a8bdc40a14 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -469,12 +469,16 @@ enum rdma_nldev_attr {
 	 * either shared or exclusive among multiple net namespaces.
 	 */
 	RDMA_NLDEV_SYS_ATTR_NETNS_MODE,		/* u8 */
-
 	/*
 	 * Device protocol, e.g. ib, iw, usnic, roce and opa
 	 */
 	RDMA_NLDEV_ATTR_DEV_PROTOCOL,		/* string */
 
+	/*
+	 * File descriptor handle of the net namespace object
+	 */
+	RDMA_NLDEV_NET_NS_FD,			/* u32 */
+
 	/*
 	 * Always the end
 	 */
-- 
2.19.2




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux