From: Kaike Wan <kaike.wan@xxxxxxxxx> This locking mechanism is designed to provent vavious memory corruption scenarios from occurring when requests are pipelined, especially when RDMA WRITE requests are interleaved with TID RDMA READ requests: 1. READ-AFTER-READ; 2. READ-AFTER-WRITE; 3. WRITE-AFTER-READ; 4. WRITE-AFTER-WRITE. When memory corruption is likely, a request will be held back until previous requests have been completed. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx> Signed-off-by: Mitko Haralanov <mitko.haralanov@xxxxxxxxx> Signed-off-by: Kaike Wan <kaike.wan@xxxxxxxxx> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx> --- drivers/infiniband/hw/hfi1/rc.c | 6 ++++ drivers/infiniband/hw/hfi1/tid_rdma.c | 46 ++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/tid_rdma.h | 9 ++++++ 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 6d2abea..cfb8633 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, } e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + /* Check for tid write fence */ + if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) || + hfi1_tid_rdma_ack_interlock(qp, e)) { + iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB); + goto bail; + } if (e->opcode == OP(RDMA_READ_REQUEST)) { /* * If a RDMA read response is being resent and diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index f786c2a..7ea8e21 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, req->state = TID_REQUEST_RESEND; req->cur_seg = req->comp_seg; } + qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK; } /* Re-process old requests.*/ if (qp->s_acked_ack_queue == qp->s_tail_ack_queue) @@ -3225,6 +3226,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) struct rvt_swqe *prev; struct hfi1_qp_priv *priv = qp->priv; u32 s_prev; + struct tid_rdma_request *req; s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1; prev = rvt_get_swqe_ptr(qp, s_prev); @@ -3236,14 +3238,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_RDMA_WRITE: + switch (prev->wr.opcode) { + case IB_WR_TID_RDMA_WRITE: + req = wqe_to_tid_req(prev); + if (req->ack_seg != req->total_segs) + goto interlock; + default: + break; + } case IB_WR_RDMA_READ: - break; + if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE) + break; + /* fall through */ case IB_WR_TID_RDMA_READ: switch (prev->wr.opcode) { case IB_WR_RDMA_READ: if (qp->s_acked != qp->s_cur) goto interlock; break; + case IB_WR_TID_RDMA_WRITE: + req = wqe_to_tid_req(prev); + if (req->ack_seg != req->total_segs) + goto interlock; default: break; } @@ -5153,7 +5169,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, e = &qp->s_ack_queue[qpriv->r_tid_ack]; req = ack_to_tid_req(e); flow = req->acked_tail; - } + } else if (req->ack_seg == req->total_segs && + qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) + qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK; hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1, &bth2); @@ -5306,3 +5324,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp) IOWAIT_PENDING_TID); return false; } + +bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e) +{ + struct rvt_ack_entry *prev; + struct tid_rdma_request *req; + struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; + u32 s_prev; + + s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) : + (qp->s_tail_ack_queue - 1); + prev = &qp->s_ack_queue[s_prev]; + + if ((e->opcode == TID_OP(READ_REQ) || + e->opcode == OP(RDMA_READ_REQUEST)) && + prev->opcode == TID_OP(WRITE_REQ)) { + req = ack_to_tid_req(prev); + if (req->ack_seg != req->total_segs) { + priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK; + return true; + } + } + return false; +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 7f8f17b..4446818 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -25,6 +25,7 @@ * s_flags, there are no collisions. * * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock + * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock */ #define HFI1_S_TID_BUSY_SET BIT(0) /* BIT(1) reserved for RVT_S_BUSY. */ @@ -32,9 +33,15 @@ /* BIT(3) reserved for RVT_S_RESP_PENDING. */ /* BIT(4) reserved for RVT_S_ACK_PENDING. */ #define HFI1_S_TID_WAIT_INTERLCK BIT(5) +#define HFI1_R_TID_WAIT_INTERLCK BIT(6) /* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */ +/* BIT(16) reserved for RVT_S_SEND_ONE */ #define HFI1_S_TID_RETRY_TIMER BIT(17) +/* BIT(18) reserved for RVT_S_ECN. */ #define HFI1_R_TID_SW_PSN BIT(19) +/* BIT(26) reserved for HFI1_S_WAIT_HALT */ +/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */ +/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */ /* * Unlike regular IB RDMA VERBS, which do not require an entry @@ -309,4 +316,6 @@ u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe, bool hfi1_schedule_tid_send(struct rvt_qp *qp); +bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e); + #endif /* HFI1_TID_RDMA_H */