From: Mitko Haralanov <mitko.haralanov@xxxxxxxxx> This commit adds the TID RDMA pointers to the opcode handlers. It also adds TID RDMA header sizes to header size table. The IB extended headers structure is expanded to include the TID RDMA specific headers. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx> Signed-off-by: Mitko Haralanov <mitko.haralanov@xxxxxxxxx> Signed-off-by: Kaike Wan <kaike.wan@xxxxxxxxx> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx> --- drivers/infiniband/hw/hfi1/driver.c | 58 +++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 2 drivers/infiniband/hw/hfi1/tid_rdma.c | 15 +++ drivers/infiniband/hw/hfi1/tid_rdma.h | 8 ++ drivers/infiniband/hw/hfi1/verbs.c | 142 +++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/verbs.h | 4 + include/rdma/ib_hdrs.h | 14 +++ 7 files changed, 220 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f855..ec18930 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1549,25 +1549,32 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) return -EINVAL; } -void handle_eflags(struct hfi1_packet *packet) +static void show_eflags_errs(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; u32 rte = rhf_rcv_type_err(packet->rhf); + dd_dev_err(rcd->dd, + "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", + rcd->ctxt, packet->rhf, + packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", + packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", + packet->rhf & RHF_DC_ERR ? "dc " : "", + packet->rhf & RHF_TID_ERR ? "tid " : "", + packet->rhf & RHF_LEN_ERR ? "len " : "", + packet->rhf & RHF_ECC_ERR ? "ecc " : "", + packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", + packet->rhf & RHF_ICRC_ERR ? "icrc " : "", + rte); +} + +void handle_eflags(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + rcv_hdrerr(rcd, rcd->ppd, packet); if (rhf_err_flags(packet->rhf)) - dd_dev_err(rcd->dd, - "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", - rcd->ctxt, packet->rhf, - packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", - packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", - packet->rhf & RHF_DC_ERR ? "dc " : "", - packet->rhf & RHF_TID_ERR ? "tid " : "", - packet->rhf & RHF_LEN_ERR ? "len " : "", - packet->rhf & RHF_ECC_ERR ? "ecc " : "", - packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", - packet->rhf & RHF_ICRC_ERR ? "icrc " : "", - rte); + show_eflags_errs(packet); } /* @@ -1673,11 +1680,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet) if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); + if (unlikely(rhf_err_flags(packet->rhf))) { + struct hfi1_ctxtdata *rcd = packet->rcd; - dd_dev_err(packet->rcd->dd, - "Unhandled expected packet received. Dropping.\n"); + if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) + return RHF_RCV_CONTINUE; + } + + hfi1_kdeth_expected_rcv(packet); return RHF_RCV_CONTINUE; } @@ -1686,11 +1696,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet) hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); - dd_dev_err(packet->rcd->dd, - "Unhandled eager packet received. Dropping.\n"); + trace_hfi1_rcvhdr(packet); + if (unlikely(rhf_err_flags(packet->rhf))) { + struct hfi1_ctxtdata *rcd = packet->rcd; + + show_eflags_errs(packet); + if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) + return RHF_RCV_CONTINUE; + } + + hfi1_kdeth_eager_rcv(packet); return RHF_RCV_CONTINUE; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 31dfc9a..2f3d045 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2097,7 +2097,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | #endif HFI1_PKT_USER_SC_INTEGRITY; - else + else if (ctxt_type != SC_KERNEL) base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; /* turn on send-side job key checks if !A0 */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 0905c83..fcc1ab7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -299,6 +299,21 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) { } +void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) +{ +} + +void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) +{ +} + +bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, + struct hfi1_pportdata *ppd, + struct hfi1_packet *packet) +{ + return true; +} + /** * qp_to_rcd - determine the receive context used by a qp * @qp - the qp diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 89acc4f..8797621 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -271,6 +271,14 @@ struct trdma_flow_state { void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet); +void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet); + +void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet); + +bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, + struct hfi1_pportdata *ppd, + struct hfi1_packet *packet); + int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_init_attr *init_attr); void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4e6832d..4e7e885 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -47,6 +47,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_hdrs.h> #include <linux/io.h> #include <linux/module.h> #include <linux/utsname.h> @@ -349,6 +350,14 @@ static inline bool wss_exceeds_threshold(void) [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, @@ -392,6 +401,21 @@ static inline bool wss_exceeds_threshold(void) [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, + + /* + * TID RDMA has separate handlers for different opcodes. + * However, TID RDMA opcodes can be handled by a single handler + * just like all the RC opcodes. + */ + [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req, + [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp, + [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data, + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data, + [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, + [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, + [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync, + [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack, + /* UC */ [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, @@ -556,6 +580,124 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } +static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet) +{ + if (packet->qp->ibqp.qp_type != IB_QPT_RC || + !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) + return NULL; + if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA) + return opcode_handler_tbl[opcode]; + return NULL; +} + +void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct ib_header *hdr = packet->hdr; + u32 tlen = packet->tlen; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler opcode_handler; + unsigned long flags; + u32 qp_num; + int lnh; + u8 opcode; + + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ + if (unlikely(tlen < 15 * sizeof(u32))) + goto drop; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh != HFI1_LRH_BTH) + goto drop; + + packet->ohdr = &hdr->u.oth; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + inc_opstats(tlen, &rcd->opstats->stats[opcode]); + + /* verbs_qp can be picked up from any tid_rdma header struct */ + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.w_data.verbs_qp) & + RVT_QPN_MASK; + + rcu_read_lock(); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!packet->qp) + goto drop_rcu; + spin_lock_irqsave(&packet->qp->r_lock, flags); + opcode_handler = tid_qp_ok(opcode, packet); + if (likely(opcode_handler)) + opcode_handler(packet); + else + goto drop_unlock; + spin_unlock_irqrestore(&packet->qp->r_lock, flags); + rcu_read_unlock(); + + return; +drop_unlock: + spin_unlock_irqrestore(&packet->qp->r_lock, flags); +drop_rcu: + rcu_read_unlock(); +drop: + ibp->rvp.n_pkt_drops++; +} + +void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct ib_header *hdr = packet->hdr; + u32 tlen = packet->tlen; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler opcode_handler; + unsigned long flags; + u32 qp_num; + int lnh; + u8 opcode; + + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ + if (unlikely(tlen < 15 * sizeof(u32))) + goto drop; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh != HFI1_LRH_BTH) + goto drop; + + packet->ohdr = &hdr->u.oth; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + inc_opstats(tlen, &rcd->opstats->stats[opcode]); + + /* verbs_qp can be picked up from any tid_rdma header struct */ + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.w_data.verbs_qp) & + RVT_QPN_MASK; + + rcu_read_lock(); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!packet->qp) + goto drop_rcu; + spin_lock_irqsave(&packet->qp->r_lock, flags); + opcode_handler = tid_qp_ok(opcode, packet); + if (likely(opcode_handler)) + opcode_handler(packet); + else + goto drop_unlock; + spin_unlock_irqrestore(&packet->qp->r_lock, flags); + rcu_read_unlock(); + + return; +drop_unlock: + spin_unlock_irqrestore(&packet->qp->r_lock, flags); +drop_rcu: + rcu_read_unlock(); +drop: + ibp->rvp.n_pkt_drops++; +} + static int hfi1_do_pkey_check(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 7f95e43..e308ebd 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -408,6 +408,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, void hfi1_unregister_ib_device(struct hfi1_devdata *); +void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet); + +void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet); + void hfi1_ib_rcv(struct hfi1_packet *packet); void hfi1_16B_rcv(struct hfi1_packet *packet); diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 6e35416..de4f4a0 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -100,6 +100,8 @@ struct ib_atomic_eth { __be64 compare_data; /* potentially unaligned */ } __packed; +#include <rdma/tid_rdma_defs.h> + union ib_ehdrs { struct { __be32 deth[2]; @@ -117,6 +119,16 @@ struct ib_atomic_eth { __be32 aeth; __be32 ieth; struct ib_atomic_eth atomic_eth; + /* TID RDMA headers */ + union { + struct tid_rdma_write_req w_req; + struct tid_rdma_write_resp w_rsp; + struct tid_rdma_write_data w_data; + struct tid_rdma_read_req r_req; + struct tid_rdma_read_resp r_rsp; + struct tid_rdma_resync resync; + struct tid_rdma_ack ack; + } tid_rdma; } __packed; struct ib_other_headers { -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html