Added code to implement the path to rxe_bind_mw which is still a stub. Now the ibv_bind_mw verb can be called. Added bind_mw work requests to the opcodes file and added the new local operation to rxe_req. Changed WR_REG_MASK to WR_LOCAL_MASK since it is used to identify local operations. Signed-off-by: Bob Pearson <rpearson@xxxxxxx> --- drivers/infiniband/sw/rxe/rxe_comp.c | 3 + drivers/infiniband/sw/rxe/rxe_loc.h | 1 + drivers/infiniband/sw/rxe/rxe_mw.c | 6 ++ drivers/infiniband/sw/rxe/rxe_opcode.c | 11 +++- drivers/infiniband/sw/rxe/rxe_req.c | 76 +++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_task.h | 2 + 6 files changed, 84 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index b9b8c4e115f4..caa8ad990337 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -130,6 +130,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; + case IB_WR_BIND_MW: return IB_WC_BIND_MW; default: return 0xff; @@ -787,6 +788,8 @@ int rxe_completer(void *arg) */ WARN_ON_ONCE(skb); rxe_drop_ref(qp); + // TODO this seems plain backwards + // EAGAIN normally means call me again return -EAGAIN; done: diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 42375af68f48..02df9bf76d1a 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type, struct ib_udata *udata); int rxe_dealloc_mw(struct ib_mw *ibmw); +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe); /* rxe_net.c */ void rxe_loopback(struct sk_buff *skb); diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 50cd451751b8..230263c6d3e5 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -96,3 +96,9 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) return 0; } + +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + pr_err("rxe_bind_mw: not implemented\n"); + return -ENOSYS; +} diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 4cf11063e0b5..d2f2092f0be5 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -114,13 +114,20 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_LOCAL_INV] = { .name = "IB_WR_LOCAL_INV", .mask = { - [IB_QPT_RC] = WR_REG_MASK, + [IB_QPT_RC] = WR_LOCAL_MASK, }, }, [IB_WR_REG_MR] = { .name = "IB_WR_REG_MR", .mask = { - [IB_QPT_RC] = WR_REG_MASK, + [IB_QPT_RC] = WR_LOCAL_MASK, + }, + }, + [IB_WR_BIND_MW] = { + .name = "IB_WR_BIND_MW", + .mask = { + [IB_QPT_RC] = WR_LOCAL_MASK, + [IB_QPT_UC] = WR_LOCAL_MASK, }, }, }; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index c566372eebf8..b402eb82b402 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -586,6 +586,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *rmr; struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; @@ -596,9 +598,17 @@ int rxe_requester(void *arg) int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; + int entered; rxe_add_ref(qp); + // this code is 'guaranteed' to never be entered more + // than once. Check to make sure that this is the case + entered = atomic_inc_return(&qp->req.task.entered); + if (entered > 1) { + pr_err("rxe_requester: entered %d times\n", entered); + } + next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) goto exit; @@ -621,13 +631,11 @@ int rxe_requester(void *arg) if (unlikely(!wqe)) goto exit; - if (wqe->mask & WR_REG_MASK) { - if (wqe->wr.opcode == IB_WR_LOCAL_INV) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_mr *rmr; - + if (wqe->mask & WR_LOCAL_MASK) { + switch (wqe->wr.opcode) { + case IB_WR_LOCAL_INV: rmr = rxe_pool_get_index(&rxe->mr_pool, - wqe->wr.ex.invalidate_rkey >> 8); + wqe->wr.ex.invalidate_rkey >> 8); if (!rmr) { pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); @@ -635,13 +643,16 @@ int rxe_requester(void *arg) wqe->status = IB_WC_MW_BIND_ERR; goto exit; } + // TODO this can race with external access + // to the MR in rxe_resp unless you can know + // that all accesses are done rmr->state = RXE_MEM_STATE_FREE; rxe_drop_ref(rmr); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - } else if (wqe->wr.opcode == IB_WR_REG_MR) { - struct rxe_mr *rmr = to_rmr(wqe->wr.wr.reg.mr); - + break; + case IB_WR_REG_MR: + rmr = to_rmr(wqe->wr.wr.reg.mr); rmr->state = RXE_MEM_STATE_VALID; rmr->access = wqe->wr.wr.reg.access; rmr->lkey = wqe->wr.wr.reg.key; @@ -649,7 +660,21 @@ int rxe_requester(void *arg) rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - } else { + break; + case IB_WR_BIND_MW: + ret = rxe_bind_mw(qp, wqe); + if (ret) { + wqe->state = wqe_state_done; + wqe->status = IB_WC_MW_BIND_ERR; + // TODO err: will change status + // probably should not + goto err; + } + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + break; + default: + pr_err("rxe_requester: unexpected LOCAL WR opcode = %d\n", wqe->wr.opcode); goto exit; } if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || @@ -704,9 +729,10 @@ int rxe_requester(void *arg) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; + // TODO why?? why not just treat the same as a + // successful wqe and go to next wqe? __rxe_do_task(&qp->comp.task); - rxe_drop_ref(qp); - return 0; + goto again; } payload = mtu; } @@ -750,12 +776,36 @@ int rxe_requester(void *arg) goto next_wqe; + // TODO this can be cleaned up err: + /* we come here if an error occured while processing + * a send wqe. The completer will put the qp in error + * state and no more wqes will be processed unless + * the qp is cleaned up and restarted. We do not want + * to be called again */ wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); + ret = -EAGAIN; + goto done; exit: + /* we come here if either there are no more wqes in the send + * queue or we are blocked waiting for some resource or event. + * The current wqe will be restarted or new wqe started when + * there is work to do. */ + ret = -EAGAIN; + goto done; + +again: + /* we come here if we are done with the current wqe but want to + * get called again. Mostly we loop back to next wqe so should + * be all one way or the other */ + ret = 0; + goto done; + +done: + atomic_dec(&qp->req.task.entered); rxe_drop_ref(qp); - return -EAGAIN; + return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 08ff42d451c6..e33806c6f5a4 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -55,6 +55,8 @@ struct rxe_task { int ret; char name[16]; bool destroyed; + // debug code, delete me when done + atomic_t entered; }; /* -- 2.25.1