From: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> The post_send() path determines if it should post directly or, schedule the post for later. The current logic is: if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold post the send else schedule This can allow large requests to call the send engine directly. Large requests can potentially produce a large number of packets prior to returning to the caller, blocking the caller from posting more requests, and allowing better parallel processing. Allow the driver(s) more say in this logic (pass call_send to the driver, rather than examining a return value). Update hfi1/qib logic to schedule the send engine if an RC or UC message is larger than the QP MTU size. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx> Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx> --- drivers/infiniband/hw/hfi1/qp.c | 12 +++++------- drivers/infiniband/hw/hfi1/verbs.h | 3 ++- drivers/infiniband/hw/qib/qib_qp.c | 17 +++++++---------- drivers/infiniband/hw/qib/qib_verbs.h | 3 ++- drivers/infiniband/sw/rdmavt/qp.c | 14 ++++++++------ include/rdma/rdma_vt.h | 10 ++++++++-- 6 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 352fef6..17ccd47 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, * hfi1_check_send_wqe - validate wqe * @qp - The qp * @wqe - The built wqe - * - * validate wqe. This is called - * prior to inserting the wqe into - * the ring but after the wqe has been - * setup. + * @call_send - Determine if the send should be posted or scheduled. * * Returns 0 on success, -EINVAL on failure * */ int hfi1_check_send_wqe(struct rvt_qp *qp, - struct rvt_swqe *wqe) + struct rvt_swqe *wqe, bool *call_send) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct rvt_ah *ah; @@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; + if (wqe->length > qp->pmtu) + *call_send = false; break; case IB_QPT_SMI: ah = ibah_to_rvtah(wqe->ud_wr.ah); @@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, default: break; } - return wqe->length <= piothreshold; + return 0; } /** diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index a4d0650..269ec33 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); -int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 344e401..a81905d 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) * qib_check_send_wqe - validate wr/wqe * @qp - The qp * @wqe - The built wqe + * @call_send - Determine if the send should be posted or scheduled * - * validate wr/wqe. This is called - * prior to inserting the wqe into - * the ring but after the wqe has been - * setup. - * - * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure + * Returns 0 on success, -EINVAL on failure */ int qib_check_send_wqe(struct rvt_qp *qp, - struct rvt_swqe *wqe) + struct rvt_swqe *wqe, bool *call_send) { struct rvt_ah *ah; - int ret = 0; switch (qp->ibqp.qp_type) { case IB_QPT_RC: case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; + if (wqe->length > qp->pmtu) + *call_send = false; break; case IB_QPT_SMI: case IB_QPT_GSI: @@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp, if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; /* progress hint */ - ret = 1; + *call_send = true; break; default: break; } - return ret; + return 0; } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 666613e..3d7b744 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); -int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e2cc827..8da0ebc 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1718,7 +1718,7 @@ static inline int rvt_qp_is_avail( */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, - int *call_send) + bool *call_send) { struct rvt_swqe *wqe; u32 next; @@ -1825,11 +1825,9 @@ static int rvt_post_one_wr(struct rvt_qp *qp, /* general part of wqe valid - allow for driver checks */ if (rdi->driver_f.check_send_wqe) { - ret = rdi->driver_f.check_send_wqe(qp, wqe); + ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send); if (ret < 0) goto bail_inval_free; - if (ret) - *call_send = ret; } log_pmtu = qp->log_pmtu; @@ -1897,7 +1895,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); unsigned long flags = 0; - int call_send; + bool call_send; unsigned nreq = 0; int err = 0; @@ -1930,7 +1928,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, bail: spin_unlock_irqrestore(&qp->s_hlock, flags); if (nreq) { - if (call_send) + /* + * Only call do_send if there is exactly one packet, and the + * driver said it was ok. + */ + if (nreq == 1 && call_send) rdi->driver_f.do_send(qp); else rdi->driver_f.schedule_send_no_lock(qp); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e79229a..e32facd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -214,8 +214,14 @@ struct rvt_driver_provided { void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); - /* Driver specific work request checking */ - int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + /* + * Validate the wqe. This needs to be done prior to inserting the + * wqe into the ring, but after the wqe has been set up. Allow for + * driver specific work request checking by providing a callback. + * call_send indicates if the wqe should be posted or scheduled. + */ + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); /* * Sometimes rdmavt needs to kick the driver's send progress. That is