[PATCH for-next 20/23] IB/hfi1: Add interlock between TID RDMA WRITE and other requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Kaike Wan <kaike.wan@xxxxxxxxx>

This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA WRITE requests are interleaved with TID RDMA READ requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
4. WRITE-AFTER-WRITE.
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx>
Signed-off-by: Mitko Haralanov <mitko.haralanov@xxxxxxxxx>
Signed-off-by: Kaike Wan <kaike.wan@xxxxxxxxx>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
---
 drivers/infiniband/hw/hfi1/rc.c       |    6 ++++
 drivers/infiniband/hw/hfi1/tid_rdma.c |   46 ++++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/hfi1/tid_rdma.h |    9 ++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 6d2abea..cfb8633 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		}
 
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		/* Check for tid write fence */
+		if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
+		    hfi1_tid_rdma_ack_interlock(qp, e)) {
+			iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
+			goto bail;
+		}
 		if (e->opcode == OP(RDMA_READ_REQUEST)) {
 			/*
 			 * If a RDMA read response is being resent and
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index f786c2a..7ea8e21 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
 			req->state = TID_REQUEST_RESEND;
 			req->cur_seg = req->comp_seg;
 		}
+		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
 	}
 	/* Re-process old requests.*/
 	if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
@@ -3225,6 +3226,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
 	struct rvt_swqe *prev;
 	struct hfi1_qp_priv *priv = qp->priv;
 	u32 s_prev;
+	struct tid_rdma_request *req;
 
 	s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
 	prev = rvt_get_swqe_ptr(qp, s_prev);
@@ -3236,14 +3238,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 	case IB_WR_RDMA_WRITE:
+		switch (prev->wr.opcode) {
+		case IB_WR_TID_RDMA_WRITE:
+			req = wqe_to_tid_req(prev);
+			if (req->ack_seg != req->total_segs)
+				goto interlock;
+		default:
+			break;
+		}
 	case IB_WR_RDMA_READ:
-		break;
+		if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
+			break;
+		/* fall through */
 	case IB_WR_TID_RDMA_READ:
 		switch (prev->wr.opcode) {
 		case IB_WR_RDMA_READ:
 			if (qp->s_acked != qp->s_cur)
 				goto interlock;
 			break;
+		case IB_WR_TID_RDMA_WRITE:
+			req = wqe_to_tid_req(prev);
+			if (req->ack_seg != req->total_segs)
+				goto interlock;
 		default:
 			break;
 		}
@@ -5153,7 +5169,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
 		e = &qp->s_ack_queue[qpriv->r_tid_ack];
 		req = ack_to_tid_req(e);
 		flow = req->acked_tail;
-	}
+	} else if (req->ack_seg == req->total_segs &&
+		   qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
+		qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
 
 	hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
 						&bth2);
@@ -5306,3 +5324,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp)
 				IOWAIT_PENDING_TID);
 	return false;
 }
+
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
+{
+	struct rvt_ack_entry *prev;
+	struct tid_rdma_request *req;
+	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+	u32 s_prev;
+
+	s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
+		(qp->s_tail_ack_queue - 1);
+	prev = &qp->s_ack_queue[s_prev];
+
+	if ((e->opcode == TID_OP(READ_REQ) ||
+	     e->opcode == OP(RDMA_READ_REQUEST)) &&
+	    prev->opcode == TID_OP(WRITE_REQ)) {
+		req = ack_to_tid_req(prev);
+		if (req->ack_seg != req->total_segs) {
+			priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
+			return true;
+		}
+	}
+	return false;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 7f8f17b..4446818 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -25,6 +25,7 @@
  * s_flags, there are no collisions.
  *
  * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
  */
 #define HFI1_S_TID_BUSY_SET       BIT(0)
 /* BIT(1) reserved for RVT_S_BUSY. */
@@ -32,9 +33,15 @@
 /* BIT(3) reserved for RVT_S_RESP_PENDING. */
 /* BIT(4) reserved for RVT_S_ACK_PENDING. */
 #define HFI1_S_TID_WAIT_INTERLCK  BIT(5)
+#define HFI1_R_TID_WAIT_INTERLCK  BIT(6)
 /* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
+/* BIT(16) reserved for RVT_S_SEND_ONE */
 #define HFI1_S_TID_RETRY_TIMER    BIT(17)
+/* BIT(18) reserved for RVT_S_ECN. */
 #define HFI1_R_TID_SW_PSN         BIT(19)
+/* BIT(26) reserved for HFI1_S_WAIT_HALT */
+/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
+/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
 
 /*
  * Unlike regular IB RDMA VERBS, which do not require an entry
@@ -309,4 +316,6 @@ u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
 
 bool hfi1_schedule_tid_send(struct rvt_qp *qp);
 
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
+
 #endif /* HFI1_TID_RDMA_H */




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux