[PATCH for-next 15/17] IB/hfi1: Add interlock between a TID RDMA request and other requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Kaike Wan <kaike.wan@xxxxxxxxx>

This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA READ/WRITE requests are interleaved with TID RDMA READ/WRITE
requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx>
Signed-off-by: Mitko Haralanov <mitko.haralanov@xxxxxxxxx>
Signed-off-by: Kaike Wan <kaike.wan@xxxxxxxxx>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
---
 drivers/infiniband/hw/hfi1/rc.c       |   16 ++++++++++++++
 drivers/infiniband/hw/hfi1/tid_rdma.c |   37 +++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/hfi1/tid_rdma.h |   11 ++++++++++
 drivers/infiniband/hw/hfi1/verbs.h    |    3 +++
 4 files changed, 67 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index a5aacf8..349751c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -482,6 +482,15 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		len = wqe->length;
 		ss = &qp->s_sge;
 		bth2 = mask_psn(qp->s_psn);
+
+		/*
+		 * Interlock between various IB requests and TID RDMA
+		 * if necessary.
+		 */
+		if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
+		    hfi1_tid_rdma_wqe_interlock(qp, wqe))
+			goto bail;
+
 		switch (wqe->wr.opcode) {
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
@@ -1321,6 +1330,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
 		qp->s_state = OP(SEND_LAST);
 	}
 done:
+	priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
 	qp->s_psn = psn;
 	/*
 	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
@@ -1540,6 +1550,8 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 				  struct rvt_swqe *wqe,
 				  struct hfi1_ibport *ibp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
+
 	lockdep_assert_held(&qp->s_lock);
 	/*
 	 * Don't decrement refcount and don't generate a
@@ -1608,6 +1620,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 			qp->s_draining = 0;
 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
+	if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
+		priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
+		hfi1_schedule_send(qp);
+	}
 	return wqe;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index f4cda45..135667b 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2825,3 +2825,40 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
 		} while (!ret);
 	}
 }
+
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	struct rvt_swqe *prev;
+	struct hfi1_qp_priv *priv = qp->priv;
+	u32 s_prev;
+
+	s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
+	prev = rvt_get_swqe_ptr(qp, s_prev);
+
+	switch (wqe->wr.opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_SEND_WITH_INV:
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_READ:
+		break;
+	case IB_WR_TID_RDMA_READ:
+		switch (prev->wr.opcode) {
+		case IB_WR_RDMA_READ:
+			if (qp->s_acked != qp->s_cur)
+				goto interlock;
+			break;
+		default:
+			break;
+		}
+	default:
+		break;
+	}
+	return false;
+
+interlock:
+	priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
+	return true;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 4f85b7e..689a549 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,16 @@
 #define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)
 
+/*
+ * Bit definitions for priv->s_flags.
+ * These bit flags overload the bit flags defined for the QP's s_flags.
+ * Due to the fact that these bit fields are used only for the QP priv
+ * s_flags, there are no collisions.
+ *
+ * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ */
+#define HFI1_S_TID_WAIT_INTERLCK  BIT(5)
+
 struct tid_rdma_params {
 	struct rcu_head rcu_head;
 	u32 qp;
@@ -210,5 +220,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
 void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			       u32 *bth2);
 void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
 
 #endif /* HFI1_TID_RDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 7642b59..841727a 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -171,6 +171,9 @@ struct hfi1_qp_priv {
 	u8 hdr_type; /* 9B or 16B */
 	unsigned long tid_timer_timeout_jiffies;
 
+	/* variables for the TID RDMA SE state machine */
+	u32 s_flags;
+
 	/* For TID RDMA READ */
 	u32 tid_r_reqs;         /* Num of tid reads requested */
 	u32 tid_r_comp;         /* Num of tid reads completed */




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux