Re: [PATCH 20/31] rdma/siw: implement non-blocking connect.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



-----"Stefan Metzmacher" <metze@xxxxxxxxx> wrote: -----

>To: "Bernard Metzler" <bmt@xxxxxxxxxxxxxx>
>From: "Stefan Metzmacher" <metze@xxxxxxxxx>
>Date: 05/07/2021 01:39AM
>Cc: linux-rdma@xxxxxxxxxxxxxxx, "Stefan Metzmacher" <metze@xxxxxxxxx>
>Subject: [EXTERNAL] [PATCH 20/31] rdma/siw: implement non-blocking
>connect.
>
>This is very important in order to prevent deadlocks.
>
>The RDMA application layer expects rdma_connect() to be non-blocking
>as the completion is handled via RDMA_CM_EVENT_ESTABLISHED and
>other async events. It's not unlikely to hold a lock during
>the rdma_connect() call.
>
>Without out this a connection attempt to a non-existing/reachable
>server block until the very long tcp timeout hits.
>The application layer had no chance to have its own timeout handler
>as that would just deadlock with the already blocking rdma_connect().
>
>Fixes: 6c52fdc244b5 ("rdma/siw: connection management")
>Signed-off-by: Stefan Metzmacher <metze@xxxxxxxxx>
>Cc: Bernard Metzler <bmt@xxxxxxxxxxxxxx>
>Cc: linux-rdma@xxxxxxxxxxxxxxx
>---
> drivers/infiniband/sw/siw/siw_cm.c | 114
>+++++++++++++++++++++--------
> drivers/infiniband/sw/siw/siw_cm.h |   1 +
> 2 files changed, 85 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/infiniband/sw/siw/siw_cm.c
>b/drivers/infiniband/sw/siw/siw_cm.c
>index cf0f881c6793..9a550f040678 100644
>--- a/drivers/infiniband/sw/siw/siw_cm.c
>+++ b/drivers/infiniband/sw/siw/siw_cm.c
>@@ -37,6 +37,7 @@ static void siw_cm_llp_write_space(struct sock *s);
> static void siw_cm_llp_error_report(struct sock *s);
> static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type
>reason,
> 			 int status);
>+static void siw_connected(struct siw_cep *cep);
> 
> static void siw_sk_assign_cm_upcalls(struct sock *sk)
> {
>@@ -1141,6 +1142,10 @@ static void siw_cm_work_handler(struct
>work_struct *w)
> 		siw_accept_newconn(cep);
> 		break;
> 
>+	case SIW_CM_WORK_CONNECTED:
>+		siw_connected(cep);
>+		break;
>+
> 	case SIW_CM_WORK_READ_MPAHDR:
> 		if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
> 			if (cep->listen_cep) {
>@@ -1306,6 +1311,7 @@ static void siw_cm_llp_data_ready(struct sock
>*sk)
> 	switch (cep->state) {
> 	case SIW_EPSTATE_RDMA_MODE:
> 	case SIW_EPSTATE_LISTENING:
>+	case SIW_EPSTATE_CONNECTING:
> 		break;
> 
> 	case SIW_EPSTATE_AWAIT_MPAREQ:
>@@ -1359,12 +1365,26 @@ static void siw_cm_llp_state_change(struct
>sock *sk)
> 
> 	switch (sk->sk_state) {
> 	case TCP_ESTABLISHED:
>-		/*
>-		 * handle accepting socket as special case where only
>-		 * new connection is possible
>-		 */
>-		siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
>-		break;
>+		if (cep->state == SIW_EPSTATE_CONNECTING) {
>+			/*
>+			 * handle accepting socket as special case where only
>+			 * new connection is possible
>+			 */
>+			siw_cm_queue_work(cep, SIW_CM_WORK_CONNECTED);
>+			break;
>+
>+		} else if (cep->state == SIW_EPSTATE_LISTENING) {
>+			/*
>+			 * handle accepting socket as special case where only
>+			 * new connection is possible
>+			 */
>+			siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
>+			break;
>+		}
>+		siw_dbg_cep(cep,
>+			    "unexpected socket state %d with cep state %d\n",
>+			    sk->sk_state, cep->state);
>+		/* fall through */
> 
> 	case TCP_CLOSE:
> 	case TCP_CLOSE_WAIT:
>@@ -1383,7 +1403,7 @@ static void siw_cm_llp_state_change(struct sock
>*sk)
> static int kernel_bindconnect(struct socket *s, struct sockaddr
>*laddr,
> 			      struct sockaddr *raddr, bool afonly)
> {
>-	int rv, flags = 0;
>+	int rv;
> 	size_t size = laddr->sa_family == AF_INET ?
> 		sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
> 
>@@ -1402,7 +1422,7 @@ static int kernel_bindconnect(struct socket *s,
>struct sockaddr *laddr,
> 	if (rv < 0)
> 		return rv;
> 
>-	rv = kernel_connect(s, raddr, size, flags);
>+	rv = kernel_connect(s, raddr, size, O_NONBLOCK);
> 
> 	return rv < 0 ? rv : 0;
> }
>@@ -1547,36 +1567,27 @@ int siw_connect(struct iw_cm_id *id, struct
>iw_cm_conn_param *params)
> 		goto error;
> 	}
> 
>-	/*
>-	 * NOTE: For simplification, connect() is called in blocking
>-	 * mode. Might be reconsidered for async connection setup at
>-	 * TCP level.
>-	 */
> 	rv = kernel_bindconnect(s, laddr, raddr, id->afonly);
>+	if (rv == -EINPROGRESS) {
>+		siw_dbg_qp(qp, "kernel_bindconnect: EINPROGRESS\n");
>+		rv = 0;
>+	}
> 	if (rv != 0) {
> 		siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
> 		goto error;
> 	}
>-	if (siw_tcp_nagle == false)
>-		tcp_sock_set_nodelay(s->sk);
>-
>-	cep->state = SIW_EPSTATE_AWAIT_MPAREP;
> 
>-	rv = siw_send_mpareqrep(cep, cep->mpa.pdata,
>-				cep->mpa.hdr.params.pd_len);
> 	/*
>-	 * Reset private data.
>+	 * The rest will be done by siw_connected()

Please use more concise language, giving some details.
The 'rest' and 'everything' refers to some state of code
understanding we cannot assume for every reader ;)


>+	 *
>+	 * siw_cm_llp_state_change() will detect
>+	 * TCP_ESTABLISHED and schedules SIW_CM_WORK_CONNECTED,
>+	 * which will finally call siw_connected().
>+	 *
>+	 * As siw_cm_llp_state_change() handles everything
>+	 * siw_cm_llp_data_ready() can be a noop for
>+	 * SIW_EPSTATE_CONNECTING.
> 	 */
>-	if (cep->mpa.hdr.params.pd_len) {
>-		cep->mpa.hdr.params.pd_len = 0;
>-		kfree(cep->mpa.pdata);
>-		cep->mpa.pdata = NULL;
>-	}
>-
>-	if (rv < 0) {
>-		goto error;
>-	}
>-
> 	siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp));
> 	siw_cep_set_free(cep);
> 	return 0;
>@@ -1604,6 +1615,49 @@ int siw_connect(struct iw_cm_id *id, struct
>iw_cm_conn_param *params)
> 	return rv;
> }
> 
>+static void siw_connected(struct siw_cep *cep)
>+{
>+	struct siw_qp *qp = cep->qp;
>+	struct socket *s = cep->sock;
>+	int rv = -ECONNABORTED;
>+
>+	/*
>+	 * already called with
>+	 * siw_cep_set_inuse(cep);
>+	 */
>+
>+	if (cep->state != SIW_EPSTATE_CONNECTING)
>+		goto error;
>+
>+	if (siw_tcp_nagle == false)
>+		tcp_sock_set_nodelay(s->sk);
>+
>+	cep->state = SIW_EPSTATE_AWAIT_MPAREP;
>+
>+	rv = siw_send_mpareqrep(cep, cep->mpa.pdata,
>+				cep->mpa.hdr.params.pd_len);
>+	/*
>+	 * Reset private data.
>+	 */
>+	if (cep->mpa.hdr.params.pd_len) {
>+		cep->mpa.hdr.params.pd_len = 0;
>+		kfree(cep->mpa.pdata);
>+		cep->mpa.pdata = NULL;
>+	}
>+
>+	if (rv < 0) {
>+		goto error;
>+	}
>+
>+	siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp));
>+	return;
>+
>+error:
>+	siw_dbg_cep(cep, "[QP %u]: exit, error %d\n", qp_id(qp), rv);
>+	siw_qp_cm_drop(qp, 1);
>+	return;
>+}
>+
> /*
>  * siw_accept - Let SoftiWARP accept an RDMA connection request
>  *
>diff --git a/drivers/infiniband/sw/siw/siw_cm.h
>b/drivers/infiniband/sw/siw/siw_cm.h
>index 4f6219bd746b..62c9947999ac 100644
>--- a/drivers/infiniband/sw/siw/siw_cm.h
>+++ b/drivers/infiniband/sw/siw/siw_cm.h
>@@ -78,6 +78,7 @@ struct siw_cep {
> 
> enum siw_work_type {
> 	SIW_CM_WORK_ACCEPT = 1,
>+	SIW_CM_WORK_CONNECTED,
> 	SIW_CM_WORK_READ_MPAHDR,
> 	SIW_CM_WORK_CLOSE_LLP, /* close socket */
> 	SIW_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
>-- 
>2.25.1
>
>





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux