Add support for loopback driver which does most work in kernel. Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> --- CMakeLists.txt | 1 + kernel-headers/rdma/rdma_user_ioctl_cmds.h | 1 + providers/loopback/CMakeLists.txt | 3 + providers/loopback/loopback.c | 429 +++++++++++++++++++++++++++++ providers/loopback/loopback.h | 104 +++++++ 5 files changed, 538 insertions(+) create mode 100644 providers/loopback/CMakeLists.txt create mode 100644 providers/loopback/loopback.c create mode 100644 providers/loopback/loopback.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b2d8d89..213a387 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,6 +596,7 @@ add_subdirectory(providers/hfi1verbs) add_subdirectory(providers/ipathverbs) add_subdirectory(providers/rxe) add_subdirectory(providers/rxe/man) +add_subdirectory(providers/loopback) if (CYTHON_EXECUTABLE) add_subdirectory(pyverbs) diff --git a/kernel-headers/rdma/rdma_user_ioctl_cmds.h b/kernel-headers/rdma/rdma_user_ioctl_cmds.h index 06c34d9..f9756a2 100644 --- a/kernel-headers/rdma/rdma_user_ioctl_cmds.h +++ b/kernel-headers/rdma/rdma_user_ioctl_cmds.h @@ -102,6 +102,7 @@ enum rdma_driver_id { RDMA_DRIVER_RXE, RDMA_DRIVER_HFI1, RDMA_DRIVER_QIB, + RDMA_DRIVER_LOOPBACK, }; #endif diff --git a/providers/loopback/CMakeLists.txt b/providers/loopback/CMakeLists.txt new file mode 100644 index 0000000..9a86852 --- /dev/null +++ b/providers/loopback/CMakeLists.txt @@ -0,0 +1,3 @@ +rdma_provider(rdma_loopback + loopback.c + ) diff --git a/providers/loopback/loopback.c b/providers/loopback/loopback.c new file mode 100644 index 0000000..b636a57 --- /dev/null +++ b/providers/loopback/loopback.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 20018-19 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> +#include <errno.h> + +#include <pthread.h> +#include <stddef.h> + +#include <infiniband/driver.h> +#include <infiniband/verbs.h> + +#include "loopback.h" + +static const struct verbs_match_ent hca_table[] = { + VERBS_NAME_MATCH("lo", NULL), + {}, +}; + +static int loopback_query_device(struct ibv_context *context, + struct ibv_device_attr *attr) +{ + struct ibv_query_device cmd; + uint64_t raw_fw_ver; + unsigned major, minor, sub_minor; + int ret; + + ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, + &cmd, sizeof(cmd)); + if (ret) + return ret; + + major = (raw_fw_ver >> 32) & 0xffff; + minor = (raw_fw_ver >> 16) & 0xffff; + sub_minor = raw_fw_ver & 0xffff; + + snprintf(attr->fw_ver, sizeof attr->fw_ver, + "%d.%d.%d", major, minor, sub_minor); + return 0; +} + +static int loopback_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); +} + +static struct ibv_pd *loopback_alloc_pd(struct ibv_context *context) +{ + struct ib_uverbs_alloc_pd_resp resp; + struct ibv_alloc_pd cmd; + struct ibv_pd *pd; + + pd = malloc(sizeof(*pd)); + if (!pd) + return NULL; + + if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof(cmd), + &resp, sizeof(resp))) { + free(pd); + return NULL; + } + + return pd; +} + +static int loopback_dealloc_pd(struct ibv_pd *pd) +{ + int ret; + + ret = ibv_cmd_dealloc_pd(pd); + if (!ret) + free(pd); + + return ret; +} + +static struct ibv_mr *loopback_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + int access) +{ + struct ib_uverbs_reg_mr_resp resp; + struct ibv_reg_mr cmd; + struct verbs_mr *vmr; + int ret; + + vmr = malloc(sizeof(*vmr)); + if (!vmr) + return NULL; + + ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr, + &cmd, sizeof(cmd), &resp, sizeof(resp)); + if (ret) { + free(vmr); + return NULL; + } + + return &vmr->ibv_mr; +} + +static int loopback_dereg_mr(struct verbs_mr *vmr) +{ + int ret; + + ret = ibv_cmd_dereg_mr(vmr); + if (ret) + return ret; + + free(vmr); + return 0; +} + +static struct ibv_cq * +loopback_create_cq(struct ibv_context *context, int cqes, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ib_uverbs_create_cq_resp resp; + struct loopback_cq *cq; + int ret; + + cq = malloc(sizeof(*cq)); + if (!cq) { + return NULL; + } + memset(cq, 0, sizeof(*cq)); + pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); + + ret = ibv_cmd_create_cq(context, cqes, channel, comp_vector, + &cq->ibv_cq, NULL, 0, + &resp, sizeof(resp)); + if (ret) { + free(cq); + return NULL; + } + + return &cq->ibv_cq; +} + +static int loopback_destroy_cq(struct ibv_cq *ibcq) +{ + struct loopback_cq *cq = ib_to_loopback_cq(ibcq); + int ret; + + ret = ibv_cmd_destroy_cq(ibcq); + if (ret) + return ret; + free(cq); + return 0; +} + +static int loopback_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) +{ + struct loopback_cq *cq = ib_to_loopback_cq(ibcq); + int ret; + + pthread_spin_lock(&cq->lock); + ret = ibv_cmd_poll_cq(ibcq, num_entries, wc); + if (ret > 0) + cq->rx_cqes += ret; + pthread_spin_unlock(&cq->lock); + return ret; +} + +static struct ibv_qp *loopback_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr) +{ + struct ib_uverbs_create_qp_resp resp; + struct ibv_create_qp cmd; + struct loopback_qp *qp; + int ret; + + qp = malloc(sizeof(*qp)); + if (!qp) { + return NULL; + } + memset(qp, 0, sizeof(*qp)); + + ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof(cmd), + &resp, sizeof(resp)); + if (ret) { + free(qp); + return NULL; + } + + return &qp->ibv_qp; +} + +static int loopback_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask, + struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, + &cmd, sizeof(cmd)); +} + +static int loopback_modify_qp(struct ibv_qp *ibvqp, + struct ibv_qp_attr *attr, + int attr_mask) +{ + struct ibv_modify_qp cmd = {}; + + return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof(cmd)); +} + +static int loopback_destroy_qp(struct ibv_qp *ibv_qp) +{ + struct loopback_qp *qp = to_rqp(ibv_qp); + int ret; + + ret = ibv_cmd_destroy_qp(ibv_qp); + if (!ret) + free(qp); + + return ret; +} + +static int loopback_post_send(struct ibv_qp *ibqp, + struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + struct loopback_qp *qp = to_rqp(ibqp); + int err = 0; + + if (!bad_wr) + return EINVAL; + + *bad_wr = NULL; + + while (wr) { + err = ibv_cmd_post_send(ibqp, wr, bad_wr); + if (err) { + qp->errs.wqe_post_errs++; + *bad_wr = wr; + break; + } else { + qp->wqes_posted++; + } + + wr = wr->next; + } + return err; +} + +static int loopback_post_recv(struct ibv_qp *ibqp, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct loopback_qp *qp = to_rqp(ibqp); + int err = 0; + + if (!bad_wr) + return EINVAL; + + *bad_wr = NULL; + + while (wr) { + err = ibv_cmd_post_recv(ibqp, wr, bad_wr); + if (err) { + qp->errs.rqe_post_errs++; + *bad_wr = wr; + break; + } else { + qp->rqes_posted++; + } + + wr = wr->next; + } + return err; +} + +static struct ibv_ah * +loopback_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + struct ib_uverbs_create_ah_resp resp; + struct loopback_ah *ah; + + ah = malloc(sizeof(*ah)); + if (ah == NULL) + return NULL; + + if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp))) { + free(ah); + return NULL; + } + return &ah->ibv_ah; +} + +static int loopback_destroy_ah(struct ibv_ah *ibah) +{ + struct loopback_ah *ah = to_rah(ibah); + int ret; + + ret = ibv_cmd_destroy_ah(&ah->ibv_ah); + if (ret) + return ret; + + free(ah); + return 0; +} + +static const struct verbs_context_ops loopback_ctx_ops = { + .query_device = loopback_query_device, + .query_port = loopback_query_port, + .alloc_pd = loopback_alloc_pd, + .dealloc_pd = loopback_dealloc_pd, + .reg_mr = loopback_reg_mr, + .dereg_mr = loopback_dereg_mr, + .create_cq = loopback_create_cq, + .poll_cq = loopback_poll_cq, + .req_notify_cq = ibv_cmd_req_notify_cq, + .destroy_cq = loopback_destroy_cq, + .create_qp = loopback_create_qp, + .query_qp = loopback_query_qp, + .modify_qp = loopback_modify_qp, + .destroy_qp = loopback_destroy_qp, + .post_send = loopback_post_send, + .post_recv = loopback_post_recv, + .create_ah = loopback_create_ah, + .destroy_ah = loopback_destroy_ah, +}; + +static struct verbs_context * +loopback_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) +{ + struct loopback_context *context; + struct ibv_get_context cmd; + struct ib_uverbs_get_context_resp resp; + + context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, + RDMA_DRIVER_LOOPBACK); + if (!context) + return NULL; + + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, + sizeof(cmd), &resp, sizeof(resp))) + goto out; + + verbs_set_ops(&context->ibv_ctx, &loopback_ctx_ops); + + return &context->ibv_ctx; + +out: + verbs_uninit_context(&context->ibv_ctx); + free(context); + return NULL; +} + +static void loopback_free_context(struct ibv_context *ibctx) +{ + struct loopback_context *context = to_rctx(ibctx); + + verbs_uninit_context(&context->ibv_ctx); + free(context); +} + +static void loopback_uninit_device(struct verbs_device *verbs_device) +{ + struct loopback_device *dev = to_rdev(&verbs_device->device); + + free(dev); +} + +static struct verbs_device *loopback_device_alloc(struct verbs_sysfs_dev *sysfs_dev) +{ + struct loopback_device *dev; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return NULL; + + dev->abi_version = sysfs_dev->abi_ver; + return &dev->ibv_dev; +} + +static const struct verbs_device_ops loopback_dev_ops = { + .name = "loopback", + /* + * For 64 bit machines ABI version 1 and 2 are the same. Otherwise 32 + * bit machines require ABI version 2 which guarentees the user and + * kernel use the same ABI. + */ + .match_min_abi_version = sizeof(void *) == 8?1:2, + .match_max_abi_version = 2, + .match_table = hca_table, + .alloc_device = loopback_device_alloc, + .uninit_device = loopback_uninit_device, + .alloc_context = loopback_alloc_context, + .free_context = loopback_free_context, +}; +PROVIDER_DRIVER(loopback, loopback_dev_ops); diff --git a/providers/loopback/loopback.h b/providers/loopback/loopback.h new file mode 100644 index 0000000..29ebe57 --- /dev/null +++ b/providers/loopback/loopback.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018-19 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LOOPBACK_H +#define LOOPBACK_H + +#include <infiniband/driver.h> +#include <sys/socket.h> + +struct loopback_device { + struct verbs_device ibv_dev; + int abi_version; +}; + +struct loopback_context { + struct verbs_context ibv_ctx; +}; + +struct loopback_cq { + struct ibv_cq ibv_cq; + pthread_spinlock_t lock; + uint64_t rx_cqes; +}; + +struct loopback_ah { + struct ibv_ah ibv_ah; +}; + +struct loopback_qp_err_counters { + uint64_t rqe_post_errs; + uint64_t wqe_post_errs; +}; + +struct loopback_qp { + struct ibv_qp ibv_qp; + pthread_spinlock_t wq_lock; + uint64_t wqes_posted; + + pthread_spinlock_t rq_lock; + uint64_t rqes_posted; + + struct loopback_qp_err_counters errs; +}; + +#define qp_type(qp) ((qp)->ibv_qp.qp_type) + +#define to_loopback(name, type) \ + container_of(ib##name, struct loopback_##type, ibv_##name) + +static inline struct loopback_context *to_rctx(struct ibv_context *ibctx) +{ + return container_of(ibctx, struct loopback_context, ibv_ctx.context); +} + +static inline struct loopback_device *to_rdev(struct ibv_device *ibdev) +{ + return container_of(ibdev, struct loopback_device, ibv_dev.device); +} + +static inline struct loopback_cq *ib_to_loopback_cq(struct ibv_cq *ibcq) +{ + return to_loopback(cq, cq); +} + +static inline struct loopback_qp *to_rqp(struct ibv_qp *ibqp) +{ + return to_loopback(qp, qp); +} + +static inline struct loopback_ah *to_rah(struct ibv_ah *ibah) +{ + return to_loopback(ah, ah); +} + +#endif /* LOOPBACK_H */ -- 1.8.3.1