[PATCH rdma-next 5/5] RDMA/core: Add command to set ib_core device net namspace sharing mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Parav Pandit <parav@xxxxxxxxxxxx>

Add netlink command that enables/disables sharing rdma device among
multiple net namespaces.

Using rdma tool,
$rdma sys set netns shared (default mode)

When rdma subsystem netns mode is set to shared mode, rdma devices
will be accessible in all net namespaces.

Using rdma tool,
$rdma sys set netns exclusive

When rdma subsystem netns mode is set to exclusive mode, devices
will be accessible in only one net namespace at any given
point of time.
Any rdma resources created or in-use before netns mode set to
exclusive, will remain in shared mode, in other words, changing
netns mode to exclusive or shared has no effect on already
open devices.

To change this mode, netlink command is used instead of sysctl, because
netlink command allows to auto load a module.

Also add module parameter to change a sharing mode of ib_core early in
the boot process. This helps to those systems where modern upto date
rdma rool (iproute2) package may not be available during kernel upgrade
cycle.

Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
 drivers/infiniband/core/core_priv.h |  2 +
 drivers/infiniband/core/device.c    | 75 +++++++++++++++++++++++++++++
 drivers/infiniband/core/nldev.c     | 25 ++++++++++
 include/uapi/rdma/rdma_netlink.h    |  3 +-
 4 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 8c2229182cfd..ed5edf2e5256 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -342,4 +342,6 @@ void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *dev,
 void ib_free_port_attrs(struct ib_core_device *coredev);
 int ib_setup_port_attrs(struct ib_core_device *coredev,
 			bool alloc_hw_stats);
+
+int rdma_compatdev_set(u8 enable);
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ede4b2975fa8..2d8259e39b7b 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -711,6 +711,7 @@ static int add_one_compat_dev(struct ib_device *device,
 	struct ib_compat_device *cdev;
 	int ret;
 
+	lockdep_assert_held(&rdma_nets_rwsem);
 	if (!ib_devices_shared_netns)
 		return 0;
 
@@ -809,6 +810,75 @@ static int add_compat_devs(struct ib_device *device)
 	return ret;
 }
 
+static void remove_all_compat_devs(void)
+{
+	struct ib_compat_device *cdev;
+	struct ib_device *dev;
+	unsigned long index;
+
+	down_read(&devices_rwsem);
+	xa_for_each (&devices, index, dev) {
+		unsigned long c_index = 0;
+
+		/* Hold nets_rwsem so that any other thread modifying this
+		 * system param can sync with this thread.
+		 */
+		down_read(&rdma_nets_rwsem);
+		xa_for_each (&dev->compat_devs, c_index, cdev)
+			remove_one_compat_dev(dev, c_index);
+		up_read(&rdma_nets_rwsem);
+	}
+	up_read(&devices_rwsem);
+}
+
+static int add_all_compat_devs(void)
+{
+	struct rdma_dev_net *rnet;
+	struct ib_device *dev;
+	unsigned long index;
+	int ret = 0;
+
+	down_read(&devices_rwsem);
+	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+		unsigned long net_index = 0;
+
+		/* Hold nets_rwsem so that any other thread modifying this
+		 * system param can sync with this thread.
+		 */
+		down_read(&rdma_nets_rwsem);
+		xa_for_each_marked (&rdma_nets, net_index,
+				    rnet, NET_NS_REGISTERED) {
+			ret = add_one_compat_dev(dev, rnet);
+			if (ret)
+				break;
+		}
+		up_read(&rdma_nets_rwsem);
+	}
+	up_read(&devices_rwsem);
+	if (ret)
+		remove_all_compat_devs();
+	return ret;
+}
+
+int rdma_compatdev_set(u8 enable)
+{
+	int ret = 0;
+
+	down_write(&rdma_nets_rwsem);
+	if (ib_devices_shared_netns == enable) {
+		up_write(&rdma_nets_rwsem);
+		return 0;
+	}
+	ib_devices_shared_netns = enable;
+	up_write(&rdma_nets_rwsem);
+
+	if (enable)
+		ret = add_all_compat_devs();
+	else
+		remove_all_compat_devs();
+	return ret;
+}
+
 static void rdma_dev_exit_net(struct net *net)
 {
 	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
@@ -871,7 +941,12 @@ static __net_init int rdma_dev_init_net(struct net *net)
 	 */
 	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+		/* Hold nets_rwsem so that netlink command cannot change
+		 * system configuration for device sharing mode.
+		 */
+		down_read(&rdma_nets_rwsem);
 		ret = add_one_compat_dev(dev, rnet);
+		up_read(&rdma_nets_rwsem);
 		if (ret)
 			break;
 	}
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index b92ac221a1e1..37605c891bd9 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1241,6 +1241,27 @@ static int nldev_get_sys_get_dumpit(struct sk_buff *skb,
 	return skb->len;
 }
 
+static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	u8 enable;
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
+		return -EINVAL;
+
+	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
+	/* Only 0 and 1 are supported */
+	if (enable > 1)
+		return -EINVAL;
+
+	err = rdma_compatdev_set(enable);
+	return err;
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -1281,6 +1302,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_SYS_GET] = {
 		.dump = nldev_get_sys_get_dumpit,
 	},
+	[RDMA_NLDEV_CMD_SYS_SET] = {
+		.doit = nldev_set_sys_set_doit,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 4feccce049c3..f9bf5f13b7c8 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -256,8 +256,7 @@ enum rdma_nldev_command {
 	RDMA_NLDEV_CMD_SET,
 
 	RDMA_NLDEV_CMD_SYS_GET, /* can dump */
-
-	/* 4 is free to use */
+	RDMA_NLDEV_CMD_SYS_SET,
 
 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
 
-- 
2.19.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux