[PATCH 1/2] librdmacm: Add support for extended join multicast API V2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Alex Vesker <valex@xxxxxxxxxxxx>
Subject: [PATCH 1/2] librdmacm: Add support for extended join multicast API V2

V1->V2
 - Fix comments to correctly refer to sendonly joins
 - Fix API spec.

Add support for specifying MC join flags.
The following multicast join flags will now be supported by
librdmacm (as already defined in the join flags in
rdma_user_cm.h through the UAPI of the kernel).

-Full Member:
 The initiator creates the Multicast group(MCG) if it wasn't
 previously created, can send Multicast messages to the group
 and receive messages from the MCG.

-Send Only Full Member:
 The initiator creates the Multicast group(MCG) if it wasn't
 previously created, can send Multicast messages but doesn't
 receive any messages from the MCG (send-only).

Tested-by: Christoph Lameter <cl@xxxxxxxxx>
Reviewed by: Hal Rosenstock <hal@xxxxxxxxxxxx>
Signed-off-by: Alex Vesker <valex@xxxxxxxxxxxx>
Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>
---

diff --git a/debian/librdmacm-dev.install b/debian/librdmacm-dev.install
index bfd8ad8..53dcb4b 100644
--- a/debian/librdmacm-dev.install
+++ b/debian/librdmacm-dev.install
@@ -33,6 +33,7 @@ usr/share/man/man3/rdma_get_send_comp.3
 usr/share/man/man3/rdma_get_src_port.3
 usr/share/man/man3/rdma_getaddrinfo.3
 usr/share/man/man3/rdma_join_multicast.3
+usr/share/man/man3/rdma_join_multicast_ex.3
 usr/share/man/man3/rdma_leave_multicast.3
 usr/share/man/man3/rdma_listen.3
 usr/share/man/man3/rdma_migrate_id.3
diff --git a/debian/librdmacm1.symbols b/debian/librdmacm1.symbols
index 7cec5c6..a7d8085 100644
--- a/debian/librdmacm1.symbols
+++ b/debian/librdmacm1.symbols
@@ -31,6 +31,7 @@ librdmacm.so.1 librdmacm1 #MINVER#
  rdma_get_src_port@RDMACM_1.0 1.0.19
  rdma_getaddrinfo@RDMACM_1.0 1.0.15
  rdma_join_multicast@RDMACM_1.0 1.0.15
+ rdma_join_multicast_ex@RDMACM_1.1 1.0.15
  rdma_leave_multicast@RDMACM_1.0 1.0.15
  rdma_listen@RDMACM_1.0 1.0.15
  rdma_migrate_id@RDMACM_1.0 1.0.15
diff --git a/librdmacm/cma.c b/librdmacm/cma.c
index 25ebaae..31cf11b 100644
--- a/librdmacm/cma.c
+++ b/librdmacm/cma.c
@@ -114,6 +114,7 @@ struct cma_multicast {
 	uint32_t	handle;
 	union ibv_gid	mgid;
 	uint16_t	mlid;
+	uint16_t	join_flags;
 	struct sockaddr_storage addr;
 };
 
@@ -1715,7 +1716,8 @@ int rdma_disconnect(struct rdma_cm_id *id)
 }
 
 static int rdma_join_multicast2(struct rdma_cm_id *id, struct sockaddr *addr,
-				socklen_t addrlen, void *context)
+				socklen_t addrlen, uint16_t join_flags,
+				void *context)
 {
 	struct ucma_abi_create_id_resp resp;
 	struct cma_id_private *id_priv;
@@ -1729,6 +1731,7 @@ static int rdma_join_multicast2(struct rdma_cm_id *id, struct sockaddr *addr,
 
 	mc->context = context;
 	mc->id_priv = id_priv;
+	mc->join_flags = join_flags;
 	memcpy(&mc->addr, addr, addrlen);
 	if (pthread_cond_init(&mc->cond, NULL)) {
 		ret = -1;
@@ -1748,7 +1751,7 @@ static int rdma_join_multicast2(struct rdma_cm_id *id, struct sockaddr *addr,
 		memcpy(&cmd.addr, addr, addrlen);
 		cmd.addr_size = addrlen;
 		cmd.uid = (uintptr_t) mc;
-		cmd.reserved = 0;
+		cmd.join_flags = join_flags;
 
 		ret = write(id->channel->fd, &cmd, sizeof cmd);
 		if (ret != sizeof cmd) {
@@ -1786,6 +1789,30 @@ err1:
 	return ret;
 }
 
+int rdma_join_multicast_ex(struct rdma_cm_id *id,
+			   struct rdma_cm_join_mc_attr_ex *mc_join_attr,
+			   void *context)
+{
+	int addrlen;
+
+	if (mc_join_attr->comp_mask >= RDMA_CM_JOIN_MC_ATTR_RESERVED)
+		return ERR(ENOTSUP);
+
+	if (!(mc_join_attr->comp_mask & RDMA_CM_JOIN_MC_ATTR_ADDRESS))
+		return ERR(EINVAL);
+
+	addrlen = ucma_addrlen(mc_join_attr->addr);
+	if (!addrlen)
+		return ERR(EINVAL);
+
+	if (!(mc_join_attr->comp_mask & RDMA_CM_JOIN_MC_ATTR_JOIN_FLAGS) ||
+	    (mc_join_attr->join_flags >= RDMA_MC_JOIN_FLAG_RESERVED))
+		return ERR(EINVAL);
+
+	return rdma_join_multicast2(id, mc_join_attr->addr, addrlen,
+				    mc_join_attr->join_flags, context);
+}
+
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 			void *context)
 {
@@ -1795,7 +1822,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	if (!addrlen)
 		return ERR(EINVAL);
 
-	return rdma_join_multicast2(id, addr, addrlen, context);
+	return rdma_join_multicast2(id, addr, addrlen,
+				    RDMA_MC_JOIN_FLAG_FULLMEMBER,
+				    context);
 }
 
 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
@@ -1823,7 +1852,7 @@ int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 	if (!mc)
 		return ERR(EADDRNOTAVAIL);
 
-	if (id->qp)
+	if (id->qp && (mc->join_flags != RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER))
 		ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid);
 	
 	CMA_INIT_CMD_RESP(&cmd, sizeof cmd, LEAVE_MCAST, &resp, sizeof resp);
@@ -2011,6 +2040,10 @@ static int ucma_process_join(struct cma_event *evt)
 	if (!evt->id_priv->id.qp)
 		return 0;
 
+	/* Don't attach QP to multicast if joined as send only full member */
+	if (evt->mc->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)
+		return 0;
+
 	return rdma_seterrno(ibv_attach_mcast(evt->id_priv->id.qp,
 					      &evt->mc->mgid, evt->mc->mlid));
 }
diff --git a/librdmacm/librdmacm.map b/librdmacm/librdmacm.map
index 65c0492..5813d74 100644
--- a/librdmacm/librdmacm.map
+++ b/librdmacm/librdmacm.map
@@ -71,3 +71,8 @@ RDMACM_1.0 {
 		rdma_create_qp_ex;
 	local: *;
 };
+
+RDMACM_1.1 {
+	global:
+		rdma_join_multicast_ex;
+} RDMACM_1.0;
diff --git a/librdmacm/man/CMakeLists.txt b/librdmacm/man/CMakeLists.txt
index d4d54c5..d016c92 100644
--- a/librdmacm/man/CMakeLists.txt
+++ b/librdmacm/man/CMakeLists.txt
@@ -33,6 +33,7 @@ rdma_man_pages(
   rdma_get_src_port.3
   rdma_getaddrinfo.3
   rdma_join_multicast.3
+  rdma_join_multicast_ex.3
   rdma_leave_multicast.3
   rdma_listen.3
   rdma_migrate_id.3
diff --git a/librdmacm/man/rdma_join_multicast_ex.3 b/librdmacm/man/rdma_join_multicast_ex.3
new file mode 100644
index 0000000..b095376
--- /dev/null
+++ b/librdmacm/man/rdma_join_multicast_ex.3
@@ -0,0 +1,66 @@
+.TH "RDMA_JOIN_MULTICAST_EX" 3 "2016-06-21" "librdmacm" "Librdmacm Programmer's Manual" librdmacm
+.SH NAME
+rdma_join_multicast_ex \- Joins a multicast group with extended options.
+.SH SYNOPSIS
+.B "#include <rdma/rdma_cma.h>"
+.P
+.B "int" rdma_join_multicast_ex
+.BI "(struct rdma_cm_id *" id ","
+.BI "struct rdma_cm_join_mc_attr_ex *" mc_join_attr ","
+.BI "void *" context ");"
+.SH ARGUMENTS
+.IP "id" 20
+Communication identifier associated with the request.
+.IP "mc_join_attr" 20
+Is an rdma_cm_join_mc_attr_ex struct, as defined in <rdma/rdma_cma.h>.
+.IP "context" 20
+User-defined context associated with the join request.
+.SH "DESCRIPTION"
+Joins a multicast group (MCG) with extended options.
+Currently supporting MC join with a specified join flag.
+.P
+.nf
+struct rdma_cm_join_mc_attr_ex {
+.in +8
+uint32_t                comp_mask;      /* Bitwise OR between "rdma_cm_join_mc_attr_mask" enum */
+uint32_t                join_flags;     /* Use a single flag from "rdma_cm_mc_join_flags" enum */
+struct sockaddr         *addr;          /* Multicast address identifying the group to join */
+.in -8
+};
+.fi
+.P
+The supported join flags are:
+.P
+.B RDMA_MC_JOIN_FLAG_FULLMEMBER
+- Create multicast group, Send multicast messages to MCG, Receive multicast messages from MCG.
+.P
+.B RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER
+- Create multicast group, Send multicast messages to MCG, Don't receive multicast messages from MCG (send-only).
+.P
+Initiating a MC join as "Send Only Full Member" on infiniband requires SM support, without such support joining will fail.
+.P
+Initiating a MC join as "Send Only Full Member" on RoCEv2/ETH will not send an IGMP message unlike a Full Member MC join.
+When "Send Only Full Member" is used QP will not be attached to the MCG.
+.P
+.SH "RETURN VALUE"
+Returns 0 on success, or -1 on error.  If an error occurs, errno will be
+set to indicate the failure reason.
+.SH "NOTES"
+Before joining a multicast group, the rdma_cm_id must be bound to
+an RDMA device by calling rdma_bind_addr or rdma_resolve_addr.  Use of
+rdma_resolve_addr requires the local routing tables to resolve the
+multicast address to an RDMA device, unless a specific source address
+is provided.  The user must call rdma_leave_multicast to leave the
+multicast group and release any multicast resources.  After the join
+operation completes, if a QP is associated with the rdma_cm_id,
+it is automatically attached to the multicast group when the multicast
+event is retrieved by the user.  Otherwise, the user is responsible
+for calling ibv_attach_mcast to bind the QP to the multicast group.
+The join context is returned to the user through the private_data
+field in the rdma_cm_event.
+.SH "SEE ALSO"
+rdma_join_multicast(3), rdma_leave_multicast(3), rdma_bind_addr(3), rdma_resolve_addr(3), rdma_create_qp(3),
+rdma_get_cm_event(3)
+.SH "AUTHORS"
+.TP
+Alex Vesker <valex@xxxxxxxxxxxx>
diff --git a/librdmacm/rdma_cma.h b/librdmacm/rdma_cma.h
index e0fc58e..2096a81 100644
--- a/librdmacm/rdma_cma.h
+++ b/librdmacm/rdma_cma.h
@@ -197,6 +197,29 @@ struct rdma_addrinfo {
 	struct rdma_addrinfo	*ai_next;
 };
 
+/* Multicast join compatibility mask attributes */
+enum rdma_cm_join_mc_attr_mask {
+	RDMA_CM_JOIN_MC_ATTR_ADDRESS	= 1 << 0,
+	RDMA_CM_JOIN_MC_ATTR_JOIN_FLAGS	= 1 << 1,
+	RDMA_CM_JOIN_MC_ATTR_RESERVED	= 1 << 2,
+};
+
+/* Multicast join flags */
+enum rdma_cm_mc_join_flags {
+	RDMA_MC_JOIN_FLAG_FULLMEMBER,
+	RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER,
+	RDMA_MC_JOIN_FLAG_RESERVED,
+};
+
+struct rdma_cm_join_mc_attr_ex {
+	/* Bitwise OR between "rdma_cm_join_mc_attr_mask" enum */
+	uint32_t comp_mask;
+	/* Use a flag from "rdma_cm_mc_join_flags" enum */
+	uint32_t join_flags;
+	/* Multicast address identifying the group to join */
+	struct sockaddr *addr;
+};
+
 /**
  * rdma_create_event_channel - Open a channel used to report communication events.
  * Description:
@@ -555,6 +578,30 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
 
 /**
+ * rdma_multicast_ex - Joins a multicast group with options.
+ * @id: Communication identifier associated with the request.
+ * @mc_join_attr: Extensive struct containing multicast join parameters.
+ * @context: User-defined context associated with the join request.
+ * Description:
+ *  Joins a multicast group with options. Currently supporting MC join flags.
+ *  The QP will be attached based on the given join flag.
+ *  Join message will be sent according to the join flag.
+ * Notes:
+ *  Before joining a multicast group, the rdma_cm_id must be bound to
+ *  an RDMA device by calling rdma_bind_addr or rdma_resolve_addr.  Use of
+ *  rdma_resolve_addr requires the local routing tables to resolve the
+ *  multicast address to an RDMA device.  The user must call
+ *  rdma_leave_multicast to leave the multicast group and release any
+ *  multicast resources.  The context is returned to the user through
+ *  the private_data field in the rdma_cm_event.
+ * See also:
+ *  rdma_leave_multicast, rdma_bind_addr, rdma_resolve_addr, rdma_create_qp
+ */
+int rdma_join_multicast_ex(struct rdma_cm_id *id,
+			   struct rdma_cm_join_mc_attr_ex *mc_join_attr,
+			   void *context);
+
+/**
  * rdma_get_cm_event - Retrieves the next pending communication event.
  * @channel: Event channel to check for events.
  * @event: Allocated information about the next communication event.
diff --git a/librdmacm/rdma_cma_abi.h b/librdmacm/rdma_cma_abi.h
index 29d6f0d..deb37a4 100644
--- a/librdmacm/rdma_cma_abi.h
+++ b/librdmacm/rdma_cma_abi.h
@@ -302,7 +302,7 @@ struct ucma_abi_join_mcast {
 	__u64 uid;
 	__u32 id;
 	__u16 addr_size;
-	__u16 reserved;
+	__u16 join_flags;
 	struct sockaddr_storage addr;
 };
 
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux