[PATCH 2/9] ib_srpt: Avoid failed multipart RDMA transfers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Bart Van Assche <bvanassche@xxxxxxx>

Multipart RDMA transfers can fail after one or more but not all RDMA
transfers have been initiated because either an IB cable has been pulled
or the ib_srpt kernel module has been unloaded while an RDMA transfer is
being setup.

This is a bugfix port from SCST svn r3632 as recommended by Bart Van
Assche.

Cc: Bart Van Assche <bvanassche@xxxxxxx>
Cc: Roland Dreier <roland@xxxxxxxxxxxxxxx>
Signed-off-by: Nicholas A. Bellinger <nab@xxxxxxxxxxxxxxx>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c |  101 +++++++++++++++++++++-----------
 drivers/infiniband/ulp/srpt/ib_srpt.h |   26 +++++++-
 2 files changed, 88 insertions(+), 39 deletions(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 383994d..6aba709 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -807,7 +807,7 @@ static int srpt_post_recv(struct srpt_device *sdev,
 	struct ib_recv_wr wr, *bad_wr;
 
 	BUG_ON(!sdev);
-	wr.wr_id = encode_wr_id(IB_WC_RECV, ioctx->ioctx.index);
+	wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
 
 	list.addr = ioctx->ioctx.dma;
 	list.length = srp_max_req_size;
@@ -849,7 +849,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
 	list.lkey = sdev->mr->lkey;
 
 	wr.next = NULL;
-	wr.wr_id = encode_wr_id(IB_WC_SEND, ioctx->ioctx.index);
+	wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
 	wr.sg_list = &list;
 	wr.num_sge = 1;
 	wr.opcode = IB_WR_SEND;
@@ -1494,17 +1494,26 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
  * check_stop_free() callback.
  */
 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
-				  struct srpt_send_ioctx *ioctx)
+				  struct srpt_send_ioctx *ioctx,
+				  enum srpt_opcode opcode)
 {
 	WARN_ON(ioctx->n_rdma <= 0);
 	atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
 
-	if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
-					SRPT_STATE_DATA_IN))
-		transport_generic_handle_data(&ioctx->cmd);
-	else
-		printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
-		       __LINE__, srpt_get_cmd_state(ioctx));
+	if (opcode == SRPT_RDMA_READ_LAST) {
+		if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+						SRPT_STATE_DATA_IN))
+			transport_generic_handle_data(&ioctx->cmd);
+		else
+			printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
+			       __LINE__, srpt_get_cmd_state(ioctx));
+	} else if (opcode == SRPT_RDMA_ABORT) {
+		ioctx->rdma_aborted = true;
+	} else {
+		WARN_ON(opcode != SRPT_RDMA_READ_LAST);
+		printk(KERN_ERR "%s[%d]: scmnd == NULL (opcode %d)", __func__,
+				__LINE__, opcode);
+	}
 }
 
 /**
@@ -1512,7 +1521,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
  */
 static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
 				      struct srpt_send_ioctx *ioctx,
-				      u8 opcode)
+				      enum srpt_opcode opcode)
 {
 	struct se_cmd *cmd;
 	enum srpt_command_state state;
@@ -1520,7 +1529,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
 	cmd = &ioctx->cmd;
 	state = srpt_get_cmd_state(ioctx);
 	switch (opcode) {
-	case IB_WC_RDMA_READ:
+	case SRPT_RDMA_READ_LAST:
 		if (ioctx->n_rdma <= 0) {
 			printk(KERN_ERR "Received invalid RDMA read"
 			       " error completion with idx %d\n",
@@ -1534,9 +1543,8 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
 			printk(KERN_ERR "%s[%d]: wrong state = %d\n",
 			       __func__, __LINE__, state);
 		break;
-	case IB_WC_RDMA_WRITE:
-		atomic_set(&ioctx->cmd.transport_lun_stop,
-			   1);
+	case SRPT_RDMA_WRITE_LAST:
+		atomic_set(&ioctx->cmd.transport_lun_stop, 1);
 		break;
 	default:
 		printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
@@ -2041,33 +2049,32 @@ static void srpt_process_send_completion(struct ib_cq *cq,
 {
 	struct srpt_send_ioctx *send_ioctx;
 	uint32_t index;
-	u8 opcode;
+	enum srpt_opcode opcode;
 
 	index = idx_from_wr_id(wc->wr_id);
 	opcode = opcode_from_wr_id(wc->wr_id);
 	send_ioctx = ch->ioctx_ring[index];
 	if (wc->status == IB_WC_SUCCESS) {
-		if (opcode == IB_WC_SEND)
+		if (opcode == SRPT_SEND)
 			srpt_handle_send_comp(ch, send_ioctx);
 		else {
-			WARN_ON(wc->opcode != IB_WC_RDMA_READ);
-			srpt_handle_rdma_comp(ch, send_ioctx);
+			WARN_ON(opcode != SRPT_RDMA_ABORT &&
+				wc->opcode != IB_WC_RDMA_READ);
+			srpt_handle_rdma_comp(ch, send_ioctx, opcode);
 		}
 	} else {
-		if (opcode == IB_WC_SEND) {
+		if (opcode == SRPT_SEND) {
 			printk(KERN_INFO "sending response for idx %u failed"
 			       " with status %d\n", index, wc->status);
 			srpt_handle_send_err_comp(ch, wc->wr_id);
-		} else {
-			printk(KERN_INFO "RDMA %s for idx %u failed with status"
-			       " %d\n", opcode == IB_WC_RDMA_READ ? "read"
-			       : opcode == IB_WC_RDMA_WRITE ? "write"
-			       : "???", index, wc->status);
+		} else if (opcode != SRPT_RDMA_MID) {
+			printk(KERN_INFO "RDMA t %d for idx %u failed with"
+				" status %d", opcode, index, wc->status);
 			srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
 		}
 	}
 
-	while (unlikely(opcode == IB_WC_SEND
+	while (unlikely(opcode == SRPT_SEND
 			&& !list_empty(&ch->cmd_wait_list)
 			&& srpt_get_ch_state(ch) == CH_LIVE
 			&& (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
@@ -2091,7 +2098,7 @@ static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
 		for (i = 0; i < n; i++) {
-			if (opcode_from_wr_id(wc[i].wr_id) & IB_WC_RECV)
+			if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
 				srpt_process_rcv_completion(cq, ch, &wc[i]);
 			else
 				srpt_process_send_completion(cq, ch, &wc[i]);
@@ -2882,32 +2889,37 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
 	int ret;
 	int sq_wr_avail;
 	enum dma_data_direction dir;
+	const int n_rdma = ioctx->n_rdma;
 
 	dir = ioctx->cmd.data_direction;
 	if (dir == DMA_TO_DEVICE) {
 		/* write */
 		ret = -ENOMEM;
-		sq_wr_avail = atomic_sub_return(ioctx->n_rdma,
-						 &ch->sq_wr_avail);
+		sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
 		if (sq_wr_avail < 0) {
 			printk(KERN_WARNING "IB send queue full (needed %d)\n",
-			       ioctx->n_rdma);
+			       n_rdma);
 			goto out;
 		}
 	}
 
+	ioctx->rdma_aborted = false;
 	ret = 0;
 	riu = ioctx->rdma_ius;
 	memset(&wr, 0, sizeof wr);
 
-	for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
+	for (i = 0; i < n_rdma; ++i, ++riu) {
 		if (dir == DMA_FROM_DEVICE) {
 			wr.opcode = IB_WR_RDMA_WRITE;
-			wr.wr_id = encode_wr_id(IB_WC_RDMA_WRITE,
+			wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+						SRPT_RDMA_WRITE_LAST :
+						SRPT_RDMA_MID,
 						ioctx->ioctx.index);
 		} else {
 			wr.opcode = IB_WR_RDMA_READ;
-			wr.wr_id = encode_wr_id(IB_WC_RDMA_READ,
+			wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+						SRPT_RDMA_READ_LAST :
+						SRPT_RDMA_MID,
 						ioctx->ioctx.index);
 		}
 		wr.next = NULL;
@@ -2917,17 +2929,36 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
 		wr.sg_list = riu->sge;
 
 		/* only get completion event for the last rdma write */
-		if (i == (ioctx->n_rdma - 1) && dir == DMA_TO_DEVICE)
+		if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
 			wr.send_flags = IB_SEND_SIGNALED;
 
 		ret = ib_post_send(ch->qp, &wr, &bad_wr);
 		if (ret)
-			goto out;
+			break;
 	}
 
+	if (ret)
+		printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
+				 __func__, __LINE__, ret, i, n_rdma);
+	if (ret && i > 0) {
+		wr.num_sge = 0;
+		wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
+		wr.send_flags = IB_SEND_SIGNALED;
+		while (ch->state == CH_LIVE &&
+			ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
+			printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
+				ioctx->ioctx.index);
+			msleep(1000);
+		}
+		while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
+			printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
+				ioctx->ioctx.index);
+			msleep(1000);
+		}
+	}
 out:
 	if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
-		atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
+		atomic_add(n_rdma, &ch->sq_wr_avail);
 	return ret;
 }
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 045fb7b..59ee2d7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -127,12 +127,27 @@ enum {
 	DEFAULT_MAX_RDMA_SIZE = 65536,
 };
 
+enum srpt_opcode {
+	SRPT_RECV,
+	SRPT_SEND,
+	SRPT_RDMA_MID,
+	SRPT_RDMA_ABORT,
+	SRPT_RDMA_READ_LAST,
+	SRPT_RDMA_WRITE_LAST,
+};
+
 static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{ return ((u64)opcode << 32) | idx; }
-static inline u8 opcode_from_wr_id(u64 wr_id)
-{ return wr_id >> 32; }
+{
+	return ((u64)opcode << 32) | idx;
+}
+static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
+{
+	return wr_id >> 32;
+}
 static inline u32 idx_from_wr_id(u64 wr_id)
-{ return (u32)wr_id; }
+{
+	return (u32)wr_id;
+}
 
 struct rdma_iu {
 	u64		raddr;
@@ -204,6 +219,8 @@ struct srpt_recv_ioctx {
  * @tag:         Tag of the received SRP information unit.
  * @spinlock:    Protects 'state'.
  * @state:       I/O context state.
+ * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
+ * 		 the already initiated transfers have finished.
  * @cmd:         Target core command data structure.
  * @sense_data:  SCSI sense data.
  */
@@ -218,6 +235,7 @@ struct srpt_send_ioctx {
 	struct list_head	free_list;
 	spinlock_t		spinlock;
 	enum srpt_command_state	state;
+	bool			rdma_aborted;
 	struct se_cmd		cmd;
 	struct completion	tx_done;
 	u64			tag;
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux