On 5/16/19 11:29 AM, Jason Gunthorpe wrote: > On Thu, May 16, 2019 at 06:24:48PM +0000, Adit Ranadive wrote: >> From: Bryan Tan <bryantan@xxxxxxxxxx> >> >> This change allows the RDMA stack to use physical resource numbers if >> they are passed up from the device. Doing so allows communication with >> physical non-ESX endpoints (such as a bare-metal Linux machine or a >> SR-IOV-enabled VM). >> >> This is accomplished by separating the concept of the QP number from >> the QP handle. Previously, the two were the same, as the QP number was >> exposed to the guest and also used to reference a virtual QP in the >> device backend. With physical resource numbers exposed, the QP number >> given to the guest is the QP number assigned to the physical HCA's QP, >> while the QP handle is still the internal handle used to reference a >> virtual QP. Regardless of whether the device is exposing physical ids, >> the driver will still try to pick up the QP handle from the backend if >> possible. The MR keys exposed to the guest will also be the MR keys >> created by the physical HCA, instead of virtual MR keys. >> >> A new version of the create QP response has been added to the device >> API. The device backend will pass the QP handle up to the driver, if >> both the device and driver are at the appriopriate version, and the ABI >> has also been updated to allow the return of the QP handle to the guest >> library. The PVRDMA version and ABI version have been bumped up because >> of these non-compatible changes. >> >> Reviewed-by: Jorgen Hansen <jhansen@xxxxxxxxxx> >> Signed-off-by: Adit Ranadive <aditr@xxxxxxxxxx> >> Signed-off-by: Bryan Tan <bryantan@xxxxxxxxxx> >> --- >> drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 15 +++++++++++++- >> drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 8 +++++++- >> drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 24 +++++++++++++++++++++-- >> include/uapi/rdma/vmw_pvrdma-abi.h | 9 ++++++++- >> 4 files changed, 51 insertions(+), 5 deletions(-) >> --- >> >> The PR for userspace was sent: >> https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Flinux-rdma%2Frdma-core%2Fpull%2F531&data=02%7C01%7Caditr%40vmware.com%7Cd4b0e1edecb04a68a4b408d6da2c6295%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636936281516146044&sdata=73ulUeBGYrnZUKnFRXyOAARBPsAyrONYAmLeksaGzDI%3D&reserved=0 >> >> --- >> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h >> index 8f9749d54688..86a6c054ea26 100644 >> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h >> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h >> @@ -58,7 +58,8 @@ >> #define PVRDMA_ROCEV1_VERSION 17 >> #define PVRDMA_ROCEV2_VERSION 18 >> #define PVRDMA_PPN64_VERSION 19 >> -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION >> +#define PVRDMA_QPHANDLE_VERSION 20 >> +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION >> >> #define PVRDMA_BOARD_ID 1 >> #define PVRDMA_REV_ID 1 >> @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { >> u32 max_inline_data; >> }; >> >> +struct pvrdma_cmd_create_qp_resp_v2 { >> + struct pvrdma_cmd_resp_hdr hdr; >> + u32 qpn; >> + u32 qp_handle; >> + u32 max_send_wr; >> + u32 max_recv_wr; >> + u32 max_send_sge; >> + u32 max_recv_sge; >> + u32 max_inline_data; >> +}; >> + >> struct pvrdma_cmd_modify_qp { >> struct pvrdma_cmd_hdr hdr; >> u32 qp_handle; >> @@ -663,6 +675,7 @@ struct pvrdma_cmd_destroy_bind { >> struct pvrdma_cmd_create_cq_resp create_cq_resp; >> struct pvrdma_cmd_resize_cq_resp resize_cq_resp; >> struct pvrdma_cmd_create_qp_resp create_qp_resp; >> + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; >> struct pvrdma_cmd_query_qp_resp query_qp_resp; >> struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; >> struct pvrdma_cmd_create_srq_resp create_srq_resp; >> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c >> index 40182297f87f..02e337837a2e 100644 >> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c >> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c >> @@ -201,7 +201,13 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) >> dev->ib_dev.owner = THIS_MODULE; >> dev->ib_dev.num_comp_vectors = 1; >> dev->ib_dev.dev.parent = &dev->pdev->dev; >> - dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; >> + >> + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) >> + dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; >> + else >> + dev->ib_dev.uverbs_abi_ver = >> + PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION; >> + >> dev->ib_dev.uverbs_cmd_mask = >> (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | >> (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | >> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c >> index 0eaaead5baec..8cba7623f379 100644 >> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c >> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c >> @@ -195,7 +195,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, >> union pvrdma_cmd_resp rsp; >> struct pvrdma_cmd_create_qp *cmd = &req.create_qp; >> struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; >> + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; >> struct pvrdma_create_qp ucmd; >> + struct pvrdma_create_qp_resp qp_resp = {}; >> unsigned long flags; >> int ret; >> bool is_srq = !!init_attr->srq; >> @@ -379,13 +381,31 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, >> } >> >> /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ >> - qp->qp_handle = resp->qpn; >> qp->port = init_attr->port_num; >> - qp->ibqp.qp_num = resp->qpn; >> + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { >> + qp->ibqp.qp_num = resp_v2->qpn; >> + qp->qp_handle = resp_v2->qp_handle; >> + } else { >> + qp->ibqp.qp_num = resp->qpn; >> + qp->qp_handle = resp->qpn; >> + } >> + >> spin_lock_irqsave(&dev->qp_tbl_lock, flags); >> dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; >> spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); >> >> + if (!qp->is_kernel) { >> + /* Copy udata back. */ >> + qp_resp.qpn = qp->ibqp.qp_num; >> + qp_resp.qp_handle = qp->qp_handle; >> + if (ib_copy_to_udata(udata, &qp_resp, sizeof(qp_resp))) { >> + dev_warn(&dev->pdev->dev, >> + "failed to copy back udata\n"); >> + pvrdma_destroy_qp(&qp->ibqp, udata); >> + return ERR_PTR(-EINVAL); >> + } >> + } >> + >> return &qp->ibqp; >> >> err_pdir: >> diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h >> index 6e73f0274e41..8ebab11dadcb 100644 >> --- a/include/uapi/rdma/vmw_pvrdma-abi.h >> +++ b/include/uapi/rdma/vmw_pvrdma-abi.h >> @@ -49,7 +49,9 @@ >> >> #include <linux/types.h> >> >> -#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */ >> +#define PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION 3 >> +#define PVRDMA_UVERBS_ABI_VERSION 4 /* ABI Version. */ > > Don't mess with ABI version when all you are doing is making the > response or request struct longer. Hmm, I thought we always had to update the ABI in case of such changes as well? Previously, we weren't copying out qpresp to udata at all and this adds a new response to user-space. Also, wouldn't older rdma-core probably break because of this since it assumes the qpn reported is the virtual one? I guess that's a bug already from a backward compat perspective.. > Jason >