[PATCH V3 net-next 04/15] smc: introduce SMC as an IB-client

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



* create a list of SMC IB-devices (IB-devices mentioned in PNET table)
* determine RoCE device and port belonging to used internal TCP interface
  according to the PNET table definitions

Signed-off-by: Ursula Braun <ubraun@xxxxxxxxxxxxxxxxxx>
---
 net/smc/Makefile   |   2 +-
 net/smc/af_smc.c   |  10 ++++
 net/smc/smc.h      |   4 ++
 net/smc/smc_ib.c   | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_ib.h   |  40 ++++++++++++++
 net/smc/smc_pnet.c |  98 +++++++++++++++++++++++++++++++++
 net/smc/smc_pnet.h |   8 +++
 7 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 net/smc/smc_ib.c
 create mode 100644 net/smc/smc_ib.h

diff --git a/net/smc/Makefile b/net/smc/Makefile
index 64dab53..50f39ff 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_SMC)	+= smc.o
-smc-y := af_smc.o smc_pnet.o
+smc-y := af_smc.o smc_pnet.o smc_ib.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a58d613..bb80e3a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -20,6 +20,7 @@
 #include <net/sock.h>
 
 #include "smc.h"
+#include "smc_ib.h"
 #include "smc_pnet.h"
 
 static void smc_set_keepalive(struct sock *sk, int val)
@@ -604,8 +605,16 @@ static int __init smc_init(void)
 		goto out_proto;
 	}
 
+	rc = smc_ib_register_client();
+	if (rc) {
+		pr_err("%s: ib_register fails with %d\n", __func__, rc);
+		goto out_sock;
+	}
+
 	return 0;
 
+out_sock:
+	sock_unregister(PF_SMC);
 out_proto:
 	proto_unregister(&smc_proto);
 out_pnet:
@@ -615,6 +624,7 @@ static int __init smc_init(void)
 
 static void __exit smc_exit(void)
 {
+	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 508f639..7e6b5b4 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -34,4 +34,8 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
 	return (struct smc_sock *)sk;
 }
 
+#define SMC_SYSTEMID_LEN		8
+
+extern u8	local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
+
 #endif	/* __SMC_H */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
new file mode 100644
index 0000000..8b6bb50
--- /dev/null
+++ b/net/smc/smc_ib.c
@@ -0,0 +1,157 @@
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  IB infrastructure:
+ *  Establish SMC-R as an Infiniband Client to be notified about added and
+ *  removed IB devices of type RDMA.
+ *  Determine device and port characteristics for these IB devices.
+ *
+ *  Copyright IBM Corp. 2016
+ *
+ *  Author(s):  Ursula Braun <ubraun@xxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/random.h>
+#include <rdma/ib_verbs.h>
+
+#include "smc_pnet.h"
+#include "smc_ib.h"
+#include "smc.h"
+
+struct smc_ib_devices smc_ib_devices = {	/* smc-registered ib devices */
+	.lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
+	.list = LIST_HEAD_INIT(smc_ib_devices.list),
+};
+
+#define SMC_LOCAL_SYSTEMID_RESET	"%%%%%%%"
+
+u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET;	/* unique system
+								 * identifier
+								 */
+
+static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct net_device *ndev;
+	int rc;
+
+	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
+			  &smcibdev->gid[ibport - 1], NULL);
+	/* the SMC protocol requires specification of the roce MAC address;
+	 * if net_device cannot be determined, it can be derived from gid 0
+	 */
+	ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
+	if (ndev) {
+		memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
+	} else if (!rc) {
+		memcpy(&smcibdev->mac[ibport - 1][0],
+		       &smcibdev->gid[ibport - 1].raw[8], 3);
+		memcpy(&smcibdev->mac[ibport - 1][3],
+		       &smcibdev->gid[ibport - 1].raw[13], 3);
+		smcibdev->mac[ibport - 1][0] &= ~0x02;
+	}
+	return rc;
+}
+
+/* Create an identifier unique for this instance of SMC-R.
+ * The MAC-address of the first active registered IB device
+ * plus a random 2-byte number is used to create this identifier.
+ * This name is delivered to the peer during connection initialization.
+ */
+static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
+						u8 ibport)
+{
+	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
+	       sizeof(smcibdev->mac[ibport - 1]));
+	get_random_bytes(&local_systemid[0], 2);
+}
+
+bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
+}
+
+int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	int rc;
+
+	memset(&smcibdev->pattr[ibport - 1], 0,
+	       sizeof(smcibdev->pattr[ibport - 1]));
+	rc = ib_query_port(smcibdev->ibdev, ibport,
+			   &smcibdev->pattr[ibport - 1]);
+	if (rc)
+		goto out;
+	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
+	if (rc)
+		goto out;
+	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
+		     sizeof(local_systemid)) &&
+	    smc_ib_port_active(smcibdev, ibport))
+		/* create unique system identifier */
+		smc_ib_define_local_systemid(smcibdev, ibport);
+out:
+	return rc;
+}
+
+static struct ib_client smc_ib_client;
+
+/* callback function for ib_register_client() */
+static void smc_ib_add_dev(struct ib_device *ibdev)
+{
+	struct smc_ib_device *smcibdev;
+	int i;
+
+	if (ibdev->node_type != RDMA_NODE_IB_CA)
+		return;
+
+	smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL);
+	if (!smcibdev)
+		return;
+
+	smcibdev->ibdev = ibdev;
+
+	for (i = 1; i <= SMC_MAX_PORTS; i++) {
+		if (smc_pnet_exists_in_table(smcibdev, i) &&
+		    !smcibdev->initialized) {
+			/* dev hotplug: ib device and port is in pnet table */
+			if (smc_ib_remember_port_attr(smcibdev, i)) {
+				kfree(smcibdev);
+				return;
+			}
+			smcibdev->initialized = 1;
+			break;
+		}
+	}
+	spin_lock(&smc_ib_devices.lock);
+	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
+	spin_unlock(&smc_ib_devices.lock);
+	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
+}
+
+/* callback function for ib_register_client() */
+static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
+{
+	struct smc_ib_device *smcibdev;
+
+	smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
+	ib_set_client_data(ibdev, &smc_ib_client, NULL);
+	spin_lock(&smc_ib_devices.lock);
+	list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
+	spin_unlock(&smc_ib_devices.lock);
+	kfree(smcibdev);
+}
+
+static struct ib_client smc_ib_client = {
+	.name	= "smc_ib",
+	.add	= smc_ib_add_dev,
+	.remove = smc_ib_remove_dev,
+};
+
+int __init smc_ib_register_client(void)
+{
+	return ib_register_client(&smc_ib_client);
+}
+
+void smc_ib_unregister_client(void)
+{
+	ib_unregister_client(&smc_ib_client);
+}
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
new file mode 100644
index 0000000..63613e7
--- /dev/null
+++ b/net/smc/smc_ib.h
@@ -0,0 +1,40 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for IB environment
+ *
+ *  Copyright IBM Corp. 2016
+ *
+ *  Author(s):  Ursula Braun <Ursula Braun@xxxxxxxxxxxxxxxxxx>
+ */
+
+#ifndef _SMC_IB_H
+#define _SMC_IB_H
+
+#include <rdma/ib_verbs.h>
+
+#define SMC_MAX_PORTS			2	/* Max # of ports */
+#define SMC_GID_SIZE			sizeof(union ib_gid)
+
+struct smc_ib_devices {			/* list of smc ib devices definition */
+	struct list_head	list;
+	spinlock_t		lock;	/* protects list of smc ib devices */
+};
+
+extern struct smc_ib_devices	smc_ib_devices; /* list of smc ib devices */
+
+struct smc_ib_device {				/* ib-device infos for smc */
+	struct list_head	list;
+	struct ib_device	*ibdev;
+	struct ib_port_attr	pattr[SMC_MAX_PORTS];	/* ib dev. port attrs */
+	char			mac[SMC_MAX_PORTS][6]; /* mac address per port*/
+	union ib_gid		gid[SMC_MAX_PORTS]; /* gid per port */
+	u8			initialized : 1; /* ib dev CQ, evthdl done */
+};
+
+int smc_ib_register_client(void) __init;
+void smc_ib_unregister_client(void);
+bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
+int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
+
+#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 4512a87..e007137 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -18,6 +18,7 @@
 
 #include <rdma/ib_verbs.h>
 
+#include "smc_ib.h"
 #include "smc_pnet.h"
 
 #define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */
@@ -185,6 +186,8 @@ static bool smc_pnet_same_ibname(struct smc_pnetentry *a, char *name, u8 ibport)
 static int smc_pnet_add_ib(struct smc_pnetentry *pnetelem, char *name,
 			   u8 ibport)
 {
+	struct smc_ib_device *smcibdev = NULL;
+	struct smc_ib_device *dev;
 	struct smc_pnetentry *p;
 	int rc = -EEXIST;
 
@@ -196,10 +199,32 @@ static int smc_pnet_add_ib(struct smc_pnetentry *pnetelem, char *name,
 	if (pnetelem->ib_name[0] == '\0') {
 		strncpy(pnetelem->ib_name, name, sizeof(pnetelem->ib_name));
 		pnetelem->ib_port = ibport;
+		spin_lock(&smc_ib_devices.lock);
+		/* using string ib_name, search smcibdev in global list */
+		list_for_each_entry(dev, &smc_ib_devices.list, list) {
+			if (!strncmp(dev->ibdev->name, pnetelem->ib_name,
+				     sizeof(pnetelem->ib_name))) {
+				smcibdev = dev;
+				break;
+			}
+		}
+		spin_unlock(&smc_ib_devices.lock);
 		rc = 0;
 	}
 out:
 	write_unlock(&smc_pnettable.lock);
+	if (smcibdev && !smcibdev->initialized) {
+		/* ib dev already existed [dev coldplug].
+		 * Complements: smc_ib_add_dev() [dev hotplug],
+		 * smc_ib_global_event_handler() [port hotplug].
+		 * Function call chain can sleep so outside of our locks.
+		 */
+		rc = smc_ib_remember_port_attr(smcibdev,
+					       pnetelem->ib_port);
+		if (rc)
+			return rc;
+		smcibdev->initialized = 1;
+	}
 	return rc;
 }
 
@@ -508,3 +533,76 @@ int __init smc_pnet_init(void)
 bad0:
 	return rc;
 }
+
+/* Scan the pnet table and find an IB device given the pnetid entry.
+ * Return infiniband device and port number if an active port is found.
+ * This function is called under smc_pnettable.lock.
+ */
+static void smc_pnet_ib_dev_by_pnet(struct smc_pnetentry *pnetelem,
+				    struct smc_ib_device **smcibdev, u8 *ibport)
+{
+	struct smc_ib_device *dev;
+
+	*smcibdev = NULL;
+	*ibport = 0;
+	spin_lock(&smc_ib_devices.lock);
+	/* using string ib->ib_name, search ibdev in global list */
+	list_for_each_entry(dev, &smc_ib_devices.list, list) {
+		if (!strncmp(dev->ibdev->name, pnetelem->ib_name,
+			     sizeof(pnetelem->ib_name)) &&
+		    smc_ib_port_active(dev, pnetelem->ib_port)) {
+			*smcibdev = dev;
+			*ibport = pnetelem->ib_port;
+			break;
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+}
+
+/* PNET table analysis for a given sock:
+ * determine ib_device and port belonging to used internal TCP socket
+ * ethernet interface.
+ */
+void smc_pnet_find_roce_resource(struct sock *sk,
+				 struct smc_ib_device **smcibdev, u8 *ibport)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+	struct smc_pnetentry *pnetelem;
+
+	*smcibdev = NULL;
+	*ibport = 0;
+
+	if (!dst)
+		return;
+	if (!dst->dev)
+		goto out_rel;
+	read_lock(&smc_pnettable.lock);
+	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
+		if (!strncmp(dst->dev->name, pnetelem->if_name, IFNAMSIZ)) {
+			smc_pnet_ib_dev_by_pnet(pnetelem, smcibdev, ibport);
+			break;
+		}
+	}
+	read_unlock(&smc_pnettable.lock);
+out_rel:
+	dst_release(dst);
+}
+
+/* Returns true if a specific ib_device and port is in the PNET table. */
+bool smc_pnet_exists_in_table(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct smc_pnetentry *pnetelem;
+	int rc = false;
+
+	read_lock(&smc_pnettable.lock);
+	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
+		if (!strncmp(smcibdev->ibdev->name, pnetelem->ib_name,
+			     IB_DEVICE_NAME_MAX) &&
+		    ibport == pnetelem->ib_port) {
+			rc = true;
+			break;
+		}
+	}
+	read_unlock(&smc_pnettable.lock);
+	return rc;
+}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 34f85f6..06dc307 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -13,6 +13,14 @@
 
 #define SMC_MAX_PORTS		2	/* Max # of ports */
 
+#include <net/sock.h>
+
+struct smc_ib_device;
+
+bool smc_pnet_exists_in_table(struct smc_ib_device *smcibdev, u8 ibport);
+void smc_pnet_find_roce_resource(struct sock *sk,
+				 struct smc_ib_device **smcibdev, u8 *ibport);
+
 int smc_pnet_init(void) __init;
 void smc_pnet_exit(void);
 
-- 
2.8.4

--
To unsubscribe from this list: send the line "unsubscribe linux-s390" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Kernel Development]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite Info]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Linux Media]     [Device Mapper]

  Powered by Linux