[PATCH rdma-next v2 07/10] RDMA/core: Implement compat device/sysfs tree in net namespace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Parav Pandit <parav@xxxxxxxxxxxx>

Implement compatibility layer sysfs entries of ib_core so that non
init_net net namespaces can also discover rdma devices.

Each non init_net net namespace has ib_core_device created in it.
Such ib_core_device sysfs tree resembles rdma devices found in
init_net namespace.
This allows discovering rdma devices in multiple non init_net
net namespaces via sysfs entries and helpful to rdma-core userspace.

Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
 drivers/infiniband/core/Makefile       |   2 +-
 drivers/infiniband/core/compat_sysfs.c | 183 +++++++++++++++++++++++++
 drivers/infiniband/core/core_priv.h    |  11 ++
 drivers/infiniband/core/device.c       |  30 +++-
 include/rdma/ib_verbs.h                |  16 +++
 5 files changed, 235 insertions(+), 7 deletions(-)
 create mode 100644 drivers/infiniband/core/compat_sysfs.c

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 867cee5e27b2..781b3172e596 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o \
 				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
 				multicast.o mad.o smi.o agent.o mad_rmpp.o \
-				nldev.o restrack.o
+				nldev.o restrack.o compat_sysfs.o
 
 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
diff --git a/drivers/infiniband/core/compat_sysfs.c b/drivers/infiniband/core/compat_sysfs.c
new file mode 100644
index 000000000000..6fa330678063
--- /dev/null
+++ b/drivers/infiniband/core/compat_sysfs.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+/**
+ * ib_compat_device - rdma compat device per net namespace
+ * @coredev:	IB core device
+ * @list:	list entry
+ */
+struct ib_compat_device {
+	struct ib_core_device	coredev;
+	struct list_head	list;
+};
+
+struct rdma_compatdev_net {
+	/* List of compat devices of a net namespace (except for init_net).
+	 * It is used to destroy compat devices when either parent rdma device
+	 * is removed, or when a given net namespace is removed.
+	 */
+	struct list_head	compatdev_list;
+
+	/* Semaphore to protect a ib_compat_device during device
+	 * add/remove or during net namespace add/remove operation.
+	 */
+	struct rw_semaphore	compat_rwsem;
+};
+
+static unsigned int rdma_compatdev_net_id;
+
+static void rdma_compatdev_release(struct device *dev)
+{
+	struct ib_core_device *coredev =
+			container_of(dev, struct ib_core_device, dev);
+	struct ib_compat_device *cdev =
+			container_of(coredev, struct ib_compat_device, coredev);
+
+	kfree(cdev);
+}
+
+static void rdma_compatdev_create(struct ib_device *device, struct net *net)
+{
+	struct rdma_compatdev_net *rdma_net =
+				net_generic(net, rdma_compatdev_net_id);
+	struct ib_compat_device *cdev;
+	int ret;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return;
+	cdev->coredev.dev.parent = device->dev.parent;
+
+	rdma_init_coredev(&cdev->coredev, device);
+	rdma_dev_net_set(&cdev->coredev, net);
+	cdev->coredev.dev.release = rdma_compatdev_release;
+	dev_set_name(&cdev->coredev.dev, "%s", device->name);
+
+	ret = device_add(&cdev->coredev.dev);
+	if (ret) {
+		kfree(cdev);
+		return;
+	}
+
+	down_write(&rdma_net->compat_rwsem);
+	list_add_tail(&cdev->list, &rdma_net->compatdev_list);
+	up_write(&rdma_net->compat_rwsem);
+}
+
+void rdma_compatdev_add(struct ib_device *device)
+{
+	struct net *net;
+
+	down_read(&net_rwsem);
+	/* create and add compat device in all namespaces other than where it
+	 * it is currently bound to.
+	 */
+	for_each_net(net) {
+		if (!net_eq(net, rdma_dev_net(&device->coredev))) {
+			/* device_add and list operation needs to be done
+			 * under a net_rwsem lock because it need to synchronize
+			 * with net namespace del operation.
+			 */
+			rdma_compatdev_create(device, net);
+		}
+	}
+	up_read(&net_rwsem);
+}
+
+static void remove_one_compatdev(struct ib_compat_device *cdev)
+{
+	list_del(&cdev->list);
+	device_unregister(&cdev->coredev.dev);
+}
+
+void rdma_compatdev_remove(struct ib_device *device)
+{
+	struct rdma_compatdev_net *rdma_net;
+	struct ib_compat_device *cur, *tmp;
+	struct net *net;
+
+	/* Hold net_rwsem while adding compat dev entries to synchronize with
+	 * _exit_net()/_init_net.
+	 */
+	down_read(&net_rwsem);
+	for_each_net(net) {
+		rdma_net = net_generic(net, rdma_compatdev_net_id);
+		down_write(&rdma_net->compat_rwsem);
+		list_for_each_entry_safe(cur, tmp,
+					 &rdma_net->compatdev_list, list) {
+			if (device == rdma_device_to_ibdev(&cur->coredev.dev)) {
+				/* Found the matching compat device, cleanup */
+				remove_one_compatdev(cur);
+				break;
+			}
+		}
+		up_write(&rdma_net->compat_rwsem);
+	}
+	up_read(&net_rwsem);
+}
+
+static __net_init int rdma_compatdev_init_net(struct net *net)
+{
+	struct rdma_compatdev_net *rdma_net =
+				net_generic(net, rdma_compatdev_net_id);
+	struct ib_device *device;
+
+	INIT_LIST_HEAD(&rdma_net->compatdev_list);
+	init_rwsem(&rdma_net->compat_rwsem);
+
+	/* No need to create any compat devices in init_net. */
+	if (net_eq(net, &init_net))
+		return 0;
+
+	/* Hold device mutex to synchronize with ib_register_device()
+	 * which also tries to add compat devices.
+	 */
+	mutex_lock(&ib_device_mutex);
+	/* Hold ib_lists_rwsem read lock; thereby not assume that
+	 * ib_device_mutex is always locked while accessing ib_device_list.
+	 */
+	down_read(&ib_lists_rwsem);
+	list_for_each_entry(device, &ib_device_list, core_list)
+		rdma_compatdev_create(device, net);
+	up_read(&ib_lists_rwsem);
+	mutex_unlock(&ib_device_mutex);
+	return 0;
+}
+
+static void rdma_compatdev_exit_net(struct net *net)
+{
+	struct rdma_compatdev_net *rdma_net =
+				net_generic(net, rdma_compatdev_net_id);
+	struct ib_compat_device *cur, *tmp;
+
+	mutex_lock(&ib_device_mutex);
+	down_write(&rdma_net->compat_rwsem);
+	list_for_each_entry_safe(cur, tmp, &rdma_net->compatdev_list, list)
+		remove_one_compatdev(cur);
+	up_write(&rdma_net->compat_rwsem);
+	mutex_unlock(&ib_device_mutex);
+}
+
+static struct pernet_operations rdma_compat_net_ops = {
+	.init	= rdma_compatdev_init_net,
+	.exit	= rdma_compatdev_exit_net,
+	.id	= &rdma_compatdev_net_id,
+	.size	= sizeof(struct rdma_compatdev_net),
+};
+
+int __init rdma_compat_dev_init(void)
+{
+	return register_pernet_device(&rdma_compat_net_ops);
+}
+
+void __exit rdma_compat_dev_cleanup(void)
+{
+	unregister_pernet_device(&rdma_compat_net_ops);
+}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 0da0a26c02b9..ea04926f6b0c 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -55,6 +55,9 @@ struct pkey_index_qp_list {
 };
 
 extern const struct attribute_group ib_dev_attr_group;
+extern struct list_head ib_device_list;
+extern struct mutex ib_device_mutex;
+extern struct rw_semaphore ib_lists_rwsem;
 
 int  ib_device_register_sysfs(struct ib_device *device,
 			      int (*port_callback)(struct ib_device *,
@@ -323,4 +326,12 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
 				 const struct ib_gid_attr *attr);
 
 struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
+void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *dev);
+
+int __init rdma_compat_dev_init(void);
+void __exit rdma_compat_dev_cleanup(void);
+
+void rdma_compatdev_add(struct ib_device *device);
+void rdma_compatdev_remove(struct ib_device *device);
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index e8ae8699cb87..2d3939dc6d22 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(ib_wq);
 /* The ib_device_list and client_list contain devices and clients after their
  * registration has completed, and the devices and clients are removed
  * during unregistration. */
-static LIST_HEAD(ib_device_list);
+LIST_HEAD(ib_device_list);
 static LIST_HEAD(client_list);
 
 /*
@@ -82,8 +82,8 @@ static LIST_HEAD(client_list);
  *
  * ib_lists_rwsem also protects access to the client data list.
  */
-static DEFINE_MUTEX(ib_device_mutex);
-static DECLARE_RWSEM(ib_lists_rwsem);
+DEFINE_MUTEX(ib_device_mutex);
+DECLARE_RWSEM(ib_lists_rwsem);
 
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
@@ -267,7 +267,10 @@ static int ib_device_uevent(struct device *device,
 
 static const void *net_namespace(struct device *d)
 {
-	return &init_net;
+	struct ib_core_device *coredev =
+			container_of(d, struct ib_core_device, dev);
+
+	return rdma_dev_net(coredev);
 }
 
 static struct class ib_class = {
@@ -278,8 +281,7 @@ static struct class ib_class = {
 	.namespace	= net_namespace,
 };
 
-static void rdma_init_coredev(struct ib_core_device *coredev,
-			      struct ib_device *dev)
+void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *dev)
 {
 	/* This BUILD_BUG_ON is intended to catch layout change
 	 * of union of ib_core_device and device.
@@ -319,6 +321,7 @@ struct ib_device *ib_alloc_device(size_t size)
 
 	device->groups[0] = &ib_dev_attr_group;
 	rdma_init_coredev(&device->coredev, device);
+	rdma_dev_net_set(&device->coredev, &init_net);
 
 	INIT_LIST_HEAD(&device->event_handler_list);
 	spin_lock_init(&device->event_handler_lock);
@@ -642,6 +645,11 @@ int ib_register_device(struct ib_device *device, const char *name,
 		goto cg_cleanup;
 	}
 
+	/* Perform this under device mutex lock, so that it can synchronize
+	 * with _init_net() to avoid duplicate additions for a given device.
+	 */
+	rdma_compatdev_add(device);
+
 	device->reg_state = IB_DEV_REGISTERED;
 
 	list_for_each_entry(client, &client_list, list)
@@ -698,6 +706,7 @@ void ib_unregister_device(struct ib_device *device)
 	}
 	up_read(&ib_lists_rwsem);
 
+	rdma_compatdev_remove(device);
 	ib_device_unregister_sysfs(device);
 	ib_device_unregister_rdmacg(device);
 
@@ -1314,12 +1323,20 @@ static int __init ib_core_init(void)
 		goto err_sa;
 	}
 
+	ret = rdma_compat_dev_init();
+	if (ret) {
+		pr_warn("Couldn't init compat dev. ret %d\n", ret);
+		goto err_compat;
+	}
+
 	nldev_init();
 	rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
 	roce_gid_mgmt_init();
 
 	return 0;
 
+err_compat:
+	unregister_lsm_notifier(&ibdev_lsm_nb);
 err_sa:
 	ib_sa_cleanup();
 err_mad:
@@ -1344,6 +1361,7 @@ static void __exit ib_core_cleanup(void)
 	roce_gid_mgmt_cleanup();
 	nldev_exit();
 	rdma_nl_unregister(RDMA_NL_LS);
+	rdma_compat_dev_cleanup();
 	unregister_lsm_notifier(&ibdev_lsm_nb);
 	ib_sa_cleanup();
 	ib_mad_cleanup();
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f9bedf28e292..6beb3a4f3c22 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2262,6 +2262,7 @@ struct ib_core_device {
 	 * union of ib_core_device and device exists in ib_device.
 	 */
 	struct device		dev;
+	possible_net_t		rdma_net;
 	struct kobject		*ports_kobj;
 	struct list_head	port_list;
 	struct ib_device	*owner;	/* reach back to owner ib_device */
@@ -2625,6 +2626,21 @@ struct ib_device {
 	struct completion unreg_completion;
 };
 
+/*
+ * Net namespace inlines
+ */
+static inline
+struct net *rdma_dev_net(const struct ib_core_device *coredev)
+{
+	return read_pnet(&coredev->rdma_net);
+}
+
+static inline
+void rdma_dev_net_set(struct ib_core_device *coredev, struct net *net)
+{
+	write_pnet(&coredev->rdma_net, net);
+}
+
 struct ib_client {
 	char  *name;
 	void (*add)   (struct ib_device *);
-- 
2.19.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux