On Wed, Apr 18, 2018 at 03:20:13AM -0700, muneendra.kumar@xxxxxxxxxxxx wrote: > From: Muneendra <muneendra.kumar@xxxxxxxxxxxx> > > This patch adds the following: > 1. librfc provider library for rdma-core, which acts as user > level interface for rdma_rfc kernel module. > 2. rfc_cfg utility, which helps in loading and configuring > the rdma_rfc Kernel module. > > This patch is inspired from librxe which provides the library for Soft RoCE kernel module. > > The Corresponding kernel module(rdma_rfc) changes has been sent for review and the details > are below. > > https://marc.info/?l=linux-rdma&m=152404459816049&w=2 > Signed-off-by: Muneendra <muneendra.kumar@xxxxxxxxxxxx> > --- > CMakeLists.txt | 2 + > kernel-headers/CMakeLists.txt | 2 + > kernel-headers/rdma/rdma_user_rfc.h | 179 +++++++ > providers/rfc/CMakeLists.txt | 8 + > providers/rfc/man/CMakeLists.txt | 4 + > providers/rfc/man/rfc.7 | 77 +++ > providers/rfc/man/rfc_cfg.8 | 70 +++ > providers/rfc/rfc-abi.h | 53 +++ > providers/rfc/rfc.c | 926 ++++++++++++++++++++++++++++++++++++ > providers/rfc/rfc.h | 129 +++++ > providers/rfc/rfc_cfg.in | 674 ++++++++++++++++++++++++++ > providers/rfc/rfc_queue.h | 128 +++++ > 12 files changed, 2252 insertions(+) > create mode 100644 kernel-headers/rdma/rdma_user_rfc.h > create mode 100644 providers/rfc/CMakeLists.txt > create mode 100644 providers/rfc/man/CMakeLists.txt > create mode 100644 providers/rfc/man/rfc.7 > create mode 100644 providers/rfc/man/rfc_cfg.8 > create mode 100644 providers/rfc/rfc-abi.h > create mode 100644 providers/rfc/rfc.c > create mode 100644 providers/rfc/rfc.h > create mode 100755 providers/rfc/rfc_cfg.in > create mode 100644 providers/rfc/rfc_queue.h > > diff --git a/CMakeLists.txt b/CMakeLists.txt > index 10a687c..0256bbd 100644 > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -502,6 +502,8 @@ add_subdirectory(providers/hfi1verbs) > add_subdirectory(providers/ipathverbs) > add_subdirectory(providers/rxe) > add_subdirectory(providers/rxe/man) > +add_subdirectory(providers/rfc) > +add_subdirectory(providers/rfc/man) > > # Binaries > add_subdirectory(ibacm) # NO SPARSE > diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt > index 3a526b9..5d280e4 100644 > --- a/kernel-headers/CMakeLists.txt > +++ b/kernel-headers/CMakeLists.txt > @@ -22,6 +22,7 @@ publish_internal_headers(rdma > rdma/rdma_user_ioctl.h > rdma/rdma_user_ioctl_cmds.h > rdma/rdma_user_rxe.h > + rdma/rdma_user_rfc.h > rdma/vmw_pvrdma-abi.h > ) > > @@ -69,6 +70,7 @@ rdma_kernel_provider_abi( > rdma/ocrdma-abi.h > rdma/qedr-abi.h > rdma/rdma_user_rxe.h > + rdma/rdma_user_rfc.h > rdma/vmw_pvrdma-abi.h > ) > > diff --git a/kernel-headers/rdma/rdma_user_rfc.h b/kernel-headers/rdma/rdma_user_rfc.h > new file mode 100644 > index 0000000..8c6b10d > --- /dev/null > +++ b/kernel-headers/rdma/rdma_user_rfc.h > @@ -0,0 +1,179 @@ > +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ > +/* > + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. Do you really want to leave it like this? > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#ifndef RDMA_USER_RXE_H > +#define RDMA_USER_RXE_H Suggesting to rename > + > +#include <linux/types.h> > +#include <linux/socket.h> > +#include <linux/in.h> > +#include <linux/in6.h> > + > +union rfc_gid { > + __u8 raw[16]; > + struct { > + __be64 subnet_prefix; > + __be64 interface_id; > + } global; > +}; For my understanding, why can't we use ibv_gid? > + > +struct rfc_global_route { > + union rfc_gid dgid; > + __u32 flow_label; > + __u8 sgid_index; > + __u8 hop_limit; > + __u8 traffic_class; > +}; > + > +struct rfc_av { > + __u8 port_num; > + __u8 network_type; > + __u16 reserved1; > + __u32 reserved2; > + struct rfc_global_route grh; > + union { > + struct sockaddr_in _sockaddr_in; > + struct sockaddr_in6 _sockaddr_in6; > + } sgid_addr, dgid_addr; > +}; > + > +struct rfc_send_wr { > + __aligned_u64 wr_id; > + __u32 num_sge; > + __u32 opcode; > + __u32 send_flags; > + union { > + __be32 imm_data; > + __u32 invalidate_rkey; > + } ex; > + union { > + struct { > + __aligned_u64 remote_addr; > + __u32 rkey; > + __u32 reserved; > + } rdma; > + struct { > + __aligned_u64 remote_addr; > + __aligned_u64 compare_add; > + __aligned_u64 swap; > + __u32 rkey; > + __u32 reserved; > + } atomic; > + struct { > + __u32 remote_qpn; > + __u32 remote_qkey; > + __u16 pkey_index; > + } ud; > + /* reg is only used by the kernel and is not part of the uapi */ > + struct { > + union { > + struct ib_mr *mr; > + __aligned_u64 reserved; > + }; > + __u32 key; > + __u32 access; > + } reg; > + } wr; > +}; > + > +struct rfc_sge { > + __aligned_u64 addr; > + __u32 length; > + __u32 lkey; > +}; > + > +struct mminfo { > + __aligned_u64 offset; > + __u32 size; > + __u32 pad; > +}; > + > +struct rfc_dma_info { > + __u32 length; > + __u32 resid; > + __u32 cur_sge; > + __u32 num_sge; > + __u32 sge_offset; > + __u32 reserved; > + union { > + __u8 inline_data[0]; > + struct rfc_sge sge[0]; > + }; > +}; > + > +struct rfc_send_wqe { > + struct rfc_send_wr wr; > + struct rfc_av av; > + __u32 status; > + __u32 state; > + __aligned_u64 iova; > + __u32 mask; > + __u32 first_psn; > + __u32 last_psn; > + __u32 ack_length; > + __u32 ssn; > + __u32 has_rd_atomic; > + struct rfc_dma_info dma; > +}; > + > +struct rfc_recv_wqe { > + __aligned_u64 wr_id; > + __u32 num_sge; > + __u32 padding; > + struct rfc_dma_info dma; > +}; > + > +struct rfc_create_cq_resp { > + struct mminfo mi; > +}; > + > +struct rfc_resize_cq_resp { > + struct mminfo mi; > +}; > + > +struct rfc_create_qp_resp { > + struct mminfo rq_mi; > + struct mminfo sq_mi; > +}; > + > +struct rfc_create_srq_resp { > + struct mminfo mi; > + __u32 srq_num; > + __u32 reserved; > +}; > + > +struct rfc_modify_srq_cmd { > + __aligned_u64 mmap_info_addr; > +}; > + > +#endif /* RDMA_USER_RXE_H */ > diff --git a/providers/rfc/CMakeLists.txt b/providers/rfc/CMakeLists.txt > new file mode 100644 > index 0000000..3123311 > --- /dev/null > +++ b/providers/rfc/CMakeLists.txt > @@ -0,0 +1,8 @@ > +rdma_provider(rfc > + rfc.c > + ) > +rdma_subst_install(FILES "rfc_cfg.in" > + RENAME "rfc_cfg" > + DESTINATION "${CMAKE_INSTALL_BINDIR}" > + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE > + ) > diff --git a/providers/rfc/man/CMakeLists.txt b/providers/rfc/man/CMakeLists.txt > new file mode 100644 > index 0000000..145855c > --- /dev/null > +++ b/providers/rfc/man/CMakeLists.txt > @@ -0,0 +1,4 @@ > +rdma_man_pages( > + rfc.7 > + rfc_cfg.8 > +) > diff --git a/providers/rfc/man/rfc.7 b/providers/rfc/man/rfc.7 > new file mode 100644 > index 0000000..594d6cd > --- /dev/null > +++ b/providers/rfc/man/rfc.7 > @@ -0,0 +1,77 @@ > +.\" -*- nroff -*- > +.\" > +.TH RFC 7 2011-06-29 1.0.0 > +.SH "NAME" > +rfc \- Software RDMA over FC > +.SH "SYNOPSIS" > +\fBmodprobe rdma_rfc\fR > +.br > +This is usually performed by a configuration utility (see \fBrfc_cfg\fR(8).) > + > +.SH "DESCRIPTION" > +The rdma_rfc kernel module provides a software implementation of RDMA over > +Fibre channel. It encapsulates RDMA payloads in FC-NVMe READ/WRITE requests > +and sends them over Fibre channel fabrics. > +The InfiniBand (IB) Base Transport Header (BTH) is encapsulated in the FC-NVMe > +header. > + > +Once a RFC instance has been created, communicating via RFC the same as > +communicating via any OFED compatible Infiniband HCA, albeit in some cases with > +addressing implications. > + > +Verbs applications written over IB verbs should work seamlessly except for the > +following constraints in current release- > +1. Partitioning is not supported. RFC module ignores any partition key in BTH. > +2. Inline and Immediate data size >= 64KB is not supported. > +3. only Reliable connection(RC) and Unreliable datagram(UD) type queue pairs > + are supported. > + > +.SH "FILES" > +.TP > +\fB/sys/class/infiniband/rfc[0,1,...]\fR > +Directory that holds RDMA device information. The format is the same as other RDMA devices. > + > +.TP > +\fB/sys/module/rdma_rfc_net/parameters/add\fR > +Write only file used by \fBrfc_cfg(8)\fR to add new RFC devices to existing Ethernet devices. > + > +.TP > +\fB/sys/module/rdma_rfc_net/parameters/remove\fR > +Write only file used by \fBrfc_cfg(8)\fR to remove RFC devices. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_qp\fR > +Read/Write file that sets a limit on the number of QPs allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_qp_wr\fR > +Read/Write file that sets a limit on the number of WRs per QP allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_mr\fR > +Read/Write file that sets a limit on the number of MRs allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_fmr\fR > +Read/Write file that sets a limit on the number of FMRs allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_cq\fR > +Read/Write file that sets a limit on the number of CQs allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_log_cqe\fR > +Read/Write file that sets a limit on the log base 2 of the number of CQEs per CQ allowed per RFC device. > + > +.TP > +\fB/sys/module/rdma_rfc/parameters/max_inline_data\fR > +Read/Write file that sets a limit on the maximum amount of inline data per WR allowed per RFC device. > + > +The above configuration parameters only affect a new RFC instance when it is created not afterwards. > + > +.SH "SEE ALSO" > +.BR rfc_cfg (8), > +.BR verbs (7), > + > +.SH "AUTHORS" > +Written by Muneendra Kumar, Anand Sundaram, Amit Tyagi at Broadcom INC. > diff --git a/providers/rfc/man/rfc_cfg.8 b/providers/rfc/man/rfc_cfg.8 > new file mode 100644 > index 0000000..8c12bbf > --- /dev/null > +++ b/providers/rfc/man/rfc_cfg.8 > @@ -0,0 +1,70 @@ > +.\" -*- nroff -*- > +.\" > +.TH RFC_CFG 8 2011-06-29 1.0.0 > +.SH "NAME" > +rfc_cfg \- rfc configuration tool for RFC (Soft RFC) > +.SH "SYNOPSIS" > +\fBrfc_cfg [status]\fR > +.br > +\fBrfc_cfg start\fR [\fB\-p\fR \fIproto\fR] > +.br > +\fBrfc_cfg stop\fR > +.br > +\fBrfc_cfg persistent\fR > +.br > +\fBrfc_cfg add\fR [\fB\-n\fR] \fIethN\fR > +.br > +\fBrfc_cfg remove\fR [\fB\-n\fR] \fIethN\fR|\fIrfcN\fR > +.br > +.SH "DESCRIPTION" > +rfc_cfg is the configuration tool for the RFC software implementation of the RFC protocol. > + > +The RFC kernel modules are loaded, configured, reconfigured and unloaded via the various rfc_cfg command options, documented below. > + > +.SH "PARAMETERS" > +.TP > +\fIethN\fR > +Network device name as listed in /sys/class/net. Only RFC Ethernet devices are supported; ie. rfcnet0. > + > +.TP > +\fIrfcN\fR > +RFC device name as listed in /sys/class/infiniband/. Examples are rfc0 or rfc1. > + > +.SH "COMMANDS" > +.TP > +[\fBstatus\fR] > +The \fBstatus\fR command prints a table of information on available Ethernet devices and configured RFC instances. The status display is the default if no options are provided. > + > +.TP > +\fBstart\fR [\fB\-p\fR \fIproto\fR] > +The \fBstart\fR command loads the RFC modules and configures any persistent instances. > + > +.TP > +\fBstop\fR > +The \fBstop\fR command unconfigures all RFC instances and attempts to unload the kernel modules. > + > +.TP > +\fBpersistent\fR > +The \fBpersistent\fR command prints the list of Ethernet devices for which a RFC instance is persistently configured. > + > +.TP > +\fBadd\fR [\fB\-n\fR] \fIethN\fR > +The \fBadd\fR command will only configure a RFC instance on RFC Ethernet device \fIrfcnetN\fR (e.g. rfcnet0). The RFC modules must have already been loaded via \fBrfc_cfg start\fR. > + > +The default behavior is to add \fIrfcnetN\fR to a file of persistent configurations and the same RFC device will be configured the next time that \fBrfc_cfg start\fR is run. If the \fB-n\fR option is included the device is not added to the persistence file. > + > +.TP > +\fBremove\fR [\fB\-n\fR] \fIethN\fR|\fIrfcN\fR > +The \fBremove\fR command will remove the specified RFC instance. The parameter must match a currently active rfcnetN or rfcN name. > + > +If the \fB-n\fR option is included the RFC device will be removed but not removed from the persistent state. So it will be recreated the next time that \fBrfc_cfg start\fR is run. > + > +.SH "FILES" > +.TP > +\fB[PREFIX]/etc/rfc.conf\fR > +RFC configuration file. Contains the list of persistent RFC instances. All persistent RFC instances can be removed by deleting this file (note this will take effect on the next "rfc_cfg start" -- to remove actively configured instances, you must "rfc_cfg stop"). > + > +.SH "SEE ALSO" > +.BR rfc (7), > +.SH "AUTHORS" > +Written by Muneendra Kumar, Anand Sundaram, Amit Tyagi at Broadcom INC. > diff --git a/providers/rfc/rfc-abi.h b/providers/rfc/rfc-abi.h > new file mode 100644 > index 0000000..a36a9ef > --- /dev/null > +++ b/providers/rfc/rfc-abi.h > @@ -0,0 +1,53 @@ > +/* > + * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. > + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + */ > + > +#ifndef RXE_ABI_H > +#define RXE_ABI_H > + > +#include <infiniband/kern-abi.h> > +#include <rdma/rdma_user_rfc.h> > +#include <kernel-abi/rdma_user_rfc.h> > + > +DECLARE_DRV_CMD(urfc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, > + empty, rfc_create_cq_resp); > +DECLARE_DRV_CMD(urfc_create_qp, IB_USER_VERBS_CMD_CREATE_QP, > + empty, rfc_create_qp_resp); > +DECLARE_DRV_CMD(urfc_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, > + empty, rfc_create_srq_resp); > +DECLARE_DRV_CMD(urfc_modify_srq, IB_USER_VERBS_CMD_MODIFY_SRQ, > + rfc_modify_srq_cmd, empty); > +DECLARE_DRV_CMD(urfc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, > + empty, rfc_resize_cq_resp); > + > +#endif /* RXE_ABI_H */ > diff --git a/providers/rfc/rfc.c b/providers/rfc/rfc.c > new file mode 100644 > index 0000000..0611bc1 > --- /dev/null > +++ b/providers/rfc/rfc.c > @@ -0,0 +1,926 @@ > +/* > + * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. > + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. > + * Copyright (C) 2006-2007 QLogic Corporation, All rights reserved. > + * Copyright (c) 2005. PathScale, Inc. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#include <config.h> > + > +#include <endian.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <string.h> > +#include <pthread.h> > +#include <netinet/in.h> > +#include <sys/mman.h> > +#include <errno.h> > + > +#include <endian.h> > +#include <pthread.h> > +#include <stddef.h> > + > +#include <infiniband/driver.h> > +#include <infiniband/verbs.h> > + > +#include "rfc_queue.h" > +#include "rfc-abi.h" > +#include "rfc.h" > + > +static const struct verbs_match_ent hca_table[] = { > + /* FIXME: rfc needs a more reliable way to detect the rfc device */ > + VERBS_NAME_MATCH("rfc", NULL), > + {}, > +}; > + > +static int rfc_query_device(struct ibv_context *context, > + struct ibv_device_attr *attr) > +{ > + struct ibv_query_device cmd; > + uint64_t raw_fw_ver; > + unsigned major, minor, sub_minor; > + int ret; > + > + ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, > + &cmd, sizeof cmd); > + if (ret) > + return ret; > + > + major = (raw_fw_ver >> 32) & 0xffff; > + minor = (raw_fw_ver >> 16) & 0xffff; > + sub_minor = raw_fw_ver & 0xffff; > + > + snprintf(attr->fw_ver, sizeof attr->fw_ver, > + "%d.%d.%d", major, minor, sub_minor); > + > + return 0; > +} > + > +static int rfc_query_port(struct ibv_context *context, uint8_t port, > + struct ibv_port_attr *attr) > +{ > + struct ibv_query_port cmd; > + > + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); > +} > + > +static struct ibv_pd *rfc_alloc_pd(struct ibv_context *context) > +{ > + struct ibv_alloc_pd cmd; > + struct ib_uverbs_alloc_pd_resp resp; > + struct ibv_pd *pd; > + > + pd = malloc(sizeof *pd); > + if (!pd) > + return NULL; > + > + if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, &resp, sizeof resp)) { > + free(pd); > + return NULL; > + } > + > + return pd; > +} > + > +static int rfc_dealloc_pd(struct ibv_pd *pd) > +{ > + int ret; > + > + ret = ibv_cmd_dealloc_pd(pd); > + if (!ret) > + free(pd); > + > + return ret; > +} > + > +static struct ibv_mr *rfc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, > + int access) > +{ > + struct ibv_mr *mr; > + struct ibv_reg_mr cmd; > + struct ib_uverbs_reg_mr_resp resp; > + int ret; > + > + mr = malloc(sizeof *mr); > + if (!mr) { > + return NULL; > + } AFAIK curly bracers are not needed here. > + > + ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, mr, > + &cmd, sizeof cmd, &resp, sizeof resp); > + if (ret) { > + free(mr); > + return NULL; > + } > + > + return mr; > +} > + > +static int rfc_dereg_mr(struct ibv_mr *mr) > +{ > + int ret; > + > + ret = ibv_cmd_dereg_mr(mr); > + if (ret) > + return ret; > + > + free(mr); > + return 0; > +} > + > +static struct ibv_cq *rfc_create_cq(struct ibv_context *context, int cqe, > + struct ibv_comp_channel *channel, > + int comp_vector) > +{ > + struct rfc_cq *cq; > + struct urfc_create_cq_resp resp; > + int ret; > + > + cq = malloc(sizeof *cq); > + if (!cq) { > + return NULL; > + } > + > + ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector, > + &cq->ibv_cq, NULL, 0, > + &resp.ibv_resp, sizeof resp); > + if (ret) { > + free(cq); > + return NULL; > + } > + > + cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED, > + context->cmd_fd, resp.mi.offset); > + if ((void *)cq->queue == MAP_FAILED) { > + ibv_cmd_destroy_cq(&cq->ibv_cq); > + free(cq); > + return NULL; > + } > + > + cq->mmap_info = resp.mi; > + pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); > + > + return &cq->ibv_cq; > +} > + > +static int rfc_resize_cq(struct ibv_cq *ibcq, int cqe) > +{ > + struct rfc_cq *cq = to_rcq(ibcq); > + struct ibv_resize_cq cmd; > + struct urfc_resize_cq_resp resp; > + int ret; > + > + pthread_spin_lock(&cq->lock); > + > + ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof cmd, > + &resp.ibv_resp, sizeof resp); > + if (ret) { > + pthread_spin_unlock(&cq->lock); > + return ret; > + } > + > + munmap(cq->queue, cq->mmap_info.size); > + > + cq->queue = mmap(NULL, resp.mi.size, > + PROT_READ | PROT_WRITE, MAP_SHARED, > + ibcq->context->cmd_fd, resp.mi.offset); > + > + ret = errno; > + pthread_spin_unlock(&cq->lock); > + > + if ((void *)cq->queue == MAP_FAILED) { > + cq->queue = NULL; > + cq->mmap_info.size = 0; > + return ret; > + } > + > + cq->mmap_info = resp.mi; > + > + return 0; > +} > + > +static int rfc_destroy_cq(struct ibv_cq *ibcq) > +{ > + struct rfc_cq *cq = to_rcq(ibcq); > + int ret; > + > + ret = ibv_cmd_destroy_cq(ibcq); > + if (ret) > + return ret; > + > + if (cq->mmap_info.size) > + munmap(cq->queue, cq->mmap_info.size); > + free(cq); > + > + return 0; > +} > + > +static int rfc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) > +{ > + struct rfc_cq *cq = to_rcq(ibcq); > + struct rfc_queue *q; > + int npolled; > + uint8_t *src; > + > + pthread_spin_lock(&cq->lock); > + q = cq->queue; > + > + for (npolled = 0; npolled < ne; ++npolled, ++wc) { > + if (queue_empty(q)) > + break; > + > + atomic_thread_fence(memory_order_acquire); > + src = consumer_addr(q); > + memcpy(wc, src, sizeof(*wc)); > + advance_consumer(q); > + } > + > + pthread_spin_unlock(&cq->lock); > + return npolled; > +} > + > +static struct ibv_srq *rfc_create_srq(struct ibv_pd *pd, > + struct ibv_srq_init_attr *attr) > +{ > + struct rfc_srq *srq; > + struct ibv_create_srq cmd; > + struct urfc_create_srq_resp resp; > + int ret; > + > + srq = malloc(sizeof *srq); > + if (srq == NULL) { > + return NULL; > + } AFAIK curly bracers are not needed here. > + > + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, sizeof cmd, > + &resp.ibv_resp, sizeof resp); > + if (ret) { > + free(srq); > + return NULL; > + } > + > + srq->rq.queue = mmap(NULL, resp.mi.size, > + PROT_READ | PROT_WRITE, MAP_SHARED, > + pd->context->cmd_fd, resp.mi.offset); > + if ((void *)srq->rq.queue == MAP_FAILED) { > + ibv_cmd_destroy_srq(&srq->ibv_srq); > + free(srq); > + return NULL; > + } Usually when there are more than one exit flows from from a function where each one repeats the cleanup of its predecessor is see that goto with labels is used. > + > + srq->mmap_info = resp.mi; > + srq->rq.max_sge = attr->attr.max_sge; > + pthread_spin_init(&srq->rq.lock, PTHREAD_PROCESS_PRIVATE); > + > + return &srq->ibv_srq; > +} > + > +static int rfc_modify_srq(struct ibv_srq *ibsrq, > + struct ibv_srq_attr *attr, int attr_mask) > +{ > + struct rfc_srq *srq = to_rsrq(ibsrq); > + struct urfc_modify_srq cmd; > + int rc = 0; > + struct mminfo mi; > + > + mi.offset = 0; > + mi.size = 0; > + > + if (attr_mask & IBV_SRQ_MAX_WR) > + pthread_spin_lock(&srq->rq.lock); > + > + cmd.mmap_info_addr = (__u64)(uintptr_t) & mi; > + rc = ibv_cmd_modify_srq(ibsrq, attr, attr_mask, > + &cmd.ibv_cmd, sizeof cmd); > + if (rc) > + goto out; > + > + if (attr_mask & IBV_SRQ_MAX_WR) { > + (void)munmap(srq->rq.queue, srq->mmap_info.size); > + srq->rq.queue = mmap(NULL, mi.size, > + PROT_READ | PROT_WRITE, MAP_SHARED, > + ibsrq->context->cmd_fd, mi.offset); > + > + if ((void *)srq->rq.queue == MAP_FAILED) { > + rc = errno; > + srq->rq.queue = NULL; > + srq->mmap_info.size = 0; > + goto out; > + } > + > + srq->mmap_info = mi; > + } > + > +out: > + if (attr_mask & IBV_SRQ_MAX_WR) > + pthread_spin_unlock(&srq->rq.lock); > + return rc; > +} > + > +static int rfc_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) > +{ > + struct ibv_query_srq cmd; > + > + return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); > +} > + > +static int rfc_destroy_srq(struct ibv_srq *ibvsrq) > +{ > + int ret; > + struct rfc_srq *srq = to_rsrq(ibvsrq); > + struct rfc_queue *q = srq->rq.queue; > + > + ret = ibv_cmd_destroy_srq(ibvsrq); > + if (!ret) { > + if (srq->mmap_info.size) > + munmap(q, srq->mmap_info.size); > + free(srq); > + } > + > + return ret; > +} > + > +static int rfc_post_one_recv(struct rfc_wq *rq, struct ibv_recv_wr *recv_wr) > +{ > + int i; > + struct rfc_recv_wqe *wqe; > + struct rfc_queue *q = rq->queue; > + int length = 0; > + int rc = 0; > + > + if (queue_full(q)) { > + rc = -ENOMEM; > + goto out; > + } > + > + if (recv_wr->num_sge > rq->max_sge) { > + rc = -EINVAL; > + goto out; > + } > + > + wqe = (struct rfc_recv_wqe *)producer_addr(q); > + > + wqe->wr_id = recv_wr->wr_id; > + wqe->num_sge = recv_wr->num_sge; > + > + memcpy(wqe->dma.sge, recv_wr->sg_list, > + wqe->num_sge*sizeof(*wqe->dma.sge)); > + > + for (i = 0; i < wqe->num_sge; i++) { > + length += wqe->dma.sge[i].length; > + } > + > + wqe->dma.length = length; > + wqe->dma.resid = length; > + wqe->dma.cur_sge = 0; > + wqe->dma.num_sge = wqe->num_sge; > + wqe->dma.sge_offset = 0; > + > + advance_producer(q); > + > +out: > + return rc; > +} > + > +static int rfc_post_srq_recv(struct ibv_srq *ibvsrq, > + struct ibv_recv_wr *recv_wr, > + struct ibv_recv_wr **bad_recv_wr) > +{ > + struct rfc_srq *srq = to_rsrq(ibvsrq); > + int rc = 0; > + > + pthread_spin_lock(&srq->rq.lock); > + > + while (recv_wr) { > + rc = rfc_post_one_recv(&srq->rq, recv_wr); > + if (rc) { > + *bad_recv_wr = recv_wr; > + break; > + } > + > + recv_wr = recv_wr->next; > + } > + > + pthread_spin_unlock(&srq->rq.lock); > + > + return rc; > +} > + > +static struct ibv_qp *rfc_create_qp(struct ibv_pd *pd, > + struct ibv_qp_init_attr *attr) > +{ > + struct ibv_create_qp cmd; > + struct urfc_create_qp_resp resp; > + struct rfc_qp *qp; > + int ret; > + > + qp = malloc(sizeof *qp); > + if (!qp) { > + return NULL; > + } Curly bracers. > + > + ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof cmd, > + &resp.ibv_resp, sizeof resp); > + if (ret) { > + free(qp); > + return NULL; > + } > + > + if (attr->srq) { > + qp->rq.max_sge = 0; > + qp->rq.queue = NULL; > + qp->rq_mmap_info.size = 0; > + } else { > + qp->rq.max_sge = attr->cap.max_recv_sge; > + qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE, > + MAP_SHARED, > + pd->context->cmd_fd, resp.rq_mi.offset); > + if ((void *)qp->rq.queue == MAP_FAILED) { > + ibv_cmd_destroy_qp(&qp->ibv_qp); > + free(qp); > + return NULL; > + } > + > + qp->rq_mmap_info = resp.rq_mi; > + pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE); > + } > + > + qp->sq.max_sge = attr->cap.max_send_sge; > + qp->sq.max_inline = attr->cap.max_inline_data; > + qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE, > + MAP_SHARED, > + pd->context->cmd_fd, resp.sq_mi.offset); > + if ((void *)qp->sq.queue == MAP_FAILED) { > + if (qp->rq_mmap_info.size) > + munmap(qp->rq.queue, qp->rq_mmap_info.size); > + ibv_cmd_destroy_qp(&qp->ibv_qp); > + free(qp); Suggesting to use the goto-label pattern. > + return NULL; > + } > + > + qp->sq_mmap_info = resp.sq_mi; > + pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE); > + > + return &qp->ibv_qp; > +} > + > +static int rfc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, > + int attr_mask, > + struct ibv_qp_init_attr *init_attr) > +{ > + struct ibv_query_qp cmd; > + > + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, > + &cmd, sizeof cmd); > +} > + > +static int rfc_modify_qp(struct ibv_qp *ibvqp, > + struct ibv_qp_attr *attr, > + int attr_mask) > +{ > + struct ibv_modify_qp cmd = {}; > + > + return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof cmd); > +} > + > +static int rfc_destroy_qp(struct ibv_qp *ibv_qp) > +{ > + int ret; > + struct rfc_qp *qp = to_rqp(ibv_qp); > + > + ret = ibv_cmd_destroy_qp(ibv_qp); > + if (!ret) { > + if (qp->rq_mmap_info.size) > + munmap(qp->rq.queue, qp->rq_mmap_info.size); > + if (qp->sq_mmap_info.size) > + munmap(qp->sq.queue, qp->sq_mmap_info.size); > + > + free(qp); > + } > + > + return ret; > +} > + > +/* basic sanity checks for send work request */ > +static int validate_send_wr(struct rfc_wq *sq, struct ibv_send_wr *ibwr, > + unsigned int length) > +{ > + enum ibv_wr_opcode opcode = ibwr->opcode; > + > + if (ibwr->num_sge > sq->max_sge) > + return -EINVAL; > + > + if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP) > + || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD)) > + if (length < 8 || ibwr->wr.atomic.remote_addr & 0x7) > + return -EINVAL; > + > + if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline)) > + return -EINVAL; > + > + return 0; > +} > + > +static void convert_send_wr(struct rfc_send_wr *kwr, struct ibv_send_wr *uwr) > +{ > + memset(kwr, 0, sizeof(*kwr)); > + > + kwr->wr_id = uwr->wr_id; > + kwr->num_sge = uwr->num_sge; > + kwr->opcode = uwr->opcode; > + kwr->send_flags = uwr->send_flags; > + kwr->ex.imm_data = uwr->imm_data; > + > + switch(uwr->opcode) { > + case IBV_WR_RDMA_WRITE: > + case IBV_WR_RDMA_WRITE_WITH_IMM: > + case IBV_WR_RDMA_READ: > + kwr->wr.rdma.remote_addr = uwr->wr.rdma.remote_addr; > + kwr->wr.rdma.rkey = uwr->wr.rdma.rkey; > + break; > + > + case IBV_WR_SEND: > + case IBV_WR_SEND_WITH_IMM: > + kwr->wr.ud.remote_qpn = uwr->wr.ud.remote_qpn; > + kwr->wr.ud.remote_qkey = uwr->wr.ud.remote_qkey; > + break; > + > + case IBV_WR_ATOMIC_CMP_AND_SWP: > + case IBV_WR_ATOMIC_FETCH_AND_ADD: > + kwr->wr.atomic.remote_addr = uwr->wr.atomic.remote_addr; > + kwr->wr.atomic.compare_add = uwr->wr.atomic.compare_add; > + kwr->wr.atomic.swap = uwr->wr.atomic.swap; > + kwr->wr.atomic.rkey = uwr->wr.atomic.rkey; > + break; > + > + case IBV_WR_LOCAL_INV: > + case IBV_WR_BIND_MW: > + case IBV_WR_SEND_WITH_INV: > + case IBV_WR_TSO: > + break; > + } > +} > + > +static int init_send_wqe(struct rfc_qp *qp, struct rfc_wq *sq, > + struct ibv_send_wr *ibwr, unsigned int length, > + struct rfc_send_wqe *wqe) > +{ > + int num_sge = ibwr->num_sge; > + int i; > + unsigned int opcode = ibwr->opcode; > + > + convert_send_wr(&wqe->wr, ibwr); > + > + if (qp_type(qp) == IBV_QPT_UD) > + memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av, > + sizeof(struct rfc_av)); > + > + if (ibwr->send_flags & IBV_SEND_INLINE) { > + uint8_t *inline_data = wqe->dma.inline_data; > + > + for (i = 0; i < num_sge; i++) { > + memcpy(inline_data, > + (uint8_t *)(long)ibwr->sg_list[i].addr, > + ibwr->sg_list[i].length); > + inline_data += ibwr->sg_list[i].length; > + } > + } else > + memcpy(wqe->dma.sge, ibwr->sg_list, > + num_sge*sizeof(struct ibv_sge)); > + > + if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP) > + || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD)) > + wqe->iova = ibwr->wr.atomic.remote_addr; > + else > + wqe->iova = ibwr->wr.rdma.remote_addr; > + wqe->dma.length = length; > + wqe->dma.resid = length; > + wqe->dma.num_sge = num_sge; > + wqe->dma.cur_sge = 0; > + wqe->dma.sge_offset = 0; > + wqe->state = 0; > + wqe->ssn = qp->ssn++; > + > + return 0; Please make this function return void. > +} > + > +static int post_one_send(struct rfc_qp *qp, struct rfc_wq *sq, > + struct ibv_send_wr *ibwr) > +{ > + int err; > + struct rfc_send_wqe *wqe; > + unsigned int length = 0; > + int i; > + > + for (i = 0; i < ibwr->num_sge; i++) > + length += ibwr->sg_list[i].length; > + > + err = validate_send_wr(sq, ibwr, length); > + if (err) { > + printf("validate send failed\n"); > + return err; > + } > + > + wqe = (struct rfc_send_wqe *)producer_addr(sq->queue); > + > + err = init_send_wqe(qp, sq, ibwr, length, wqe); > + if (err) > + return err; > + > + if (queue_full(sq->queue)) > + return -ENOMEM; > + > + advance_producer(sq->queue); > + > + return 0; > +} > + > +/* send a null post send as a doorbell */ > +static int post_send_db(struct ibv_qp *ibqp) > +{ > + struct ibv_post_send cmd; > + struct ib_uverbs_post_send_resp resp; > + > + cmd.hdr.command = IB_USER_VERBS_CMD_POST_SEND; > + cmd.hdr.in_words = sizeof(cmd) / 4; > + cmd.hdr.out_words = sizeof(resp) / 4; > + cmd.response = (uintptr_t)&resp; > + cmd.qp_handle = ibqp->handle; > + cmd.wr_count = 0; > + cmd.sge_count = 0; > + cmd.wqe_size = sizeof(struct ibv_send_wr); > + > + if (write(ibqp->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) > + return errno; > + > + return 0; > +} > + > +/* this API does not make a distinction between > + restartable and non-restartable errors */ > +static int rfc_post_send(struct ibv_qp *ibqp, > + struct ibv_send_wr *wr_list, > + struct ibv_send_wr **bad_wr) > +{ > + int rc = 0; > + int err; > + struct rfc_qp *qp = to_rqp(ibqp); > + struct rfc_wq *sq = &qp->sq; > + > + if (!bad_wr) > + return EINVAL; > + > + *bad_wr = NULL; > + > + if (!sq || !wr_list || !sq->queue) > + return EINVAL; > + > + pthread_spin_lock(&sq->lock); > + > + while (wr_list) { > + rc = post_one_send(qp, sq, wr_list); > + if (rc) { > + *bad_wr = wr_list; > + break; > + } > + > + wr_list = wr_list->next; > + } > + > + pthread_spin_unlock(&sq->lock); > + > + err = post_send_db(ibqp); Extra space. > + return err ? err : rc; > +} > + > +static int rfc_post_recv(struct ibv_qp *ibqp, > + struct ibv_recv_wr *recv_wr, > + struct ibv_recv_wr **bad_wr) > +{ > + int rc = 0; > + struct rfc_qp *qp = to_rqp(ibqp); > + struct rfc_wq *rq = &qp->rq; > + > + if (!bad_wr) > + return EINVAL; > + > + *bad_wr = NULL; > + > + if (!rq || !recv_wr || !rq->queue) > + return EINVAL; > + > + pthread_spin_lock(&rq->lock); > + > + while (recv_wr) { > + rc = rfc_post_one_recv(rq, recv_wr); > + if (rc) { > + *bad_wr = recv_wr; > + break; > + } > + > + recv_wr = recv_wr->next; > + } > + > + pthread_spin_unlock(&rq->lock); > + > + return rc; > +} > + > +static inline int ipv6_addr_v4mapped(const struct in6_addr *a) > +{ > + return IN6_IS_ADDR_V4MAPPED(a); > +} > + > +typedef typeof(((struct rfc_av *)0)->sgid_addr) sockaddr_union_t; > + > +static inline int rdma_gid2ip(sockaddr_union_t *out, union ibv_gid *gid) > +{ > + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { > + memset(&out->_sockaddr_in, 0, sizeof(out->_sockaddr_in)); > + memcpy(&out->_sockaddr_in.sin_addr.s_addr, gid->raw + 12, 4); > + } else { > + memset(&out->_sockaddr_in6, 0, sizeof(out->_sockaddr_in6)); > + out->_sockaddr_in6.sin6_family = AF_INET6; > + memcpy(&out->_sockaddr_in6.sin6_addr.s6_addr, gid->raw, 16); > + } > + return 0; Please make this function return void. > +} > + > +static struct ibv_ah *rfc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) > +{ > + int err; > + struct rfc_ah *ah; > + struct rfc_av *av; > + union ibv_gid sgid; > + struct ib_uverbs_create_ah_resp resp; > + > + err = ibv_query_gid(pd->context, attr->port_num, attr->grh.sgid_index, > + &sgid); > + if (err) { > + fprintf(stderr, "rfc: Failed to query sgid.\n"); Not sure we like to print from library (is it library?). Also, what is so special with this error where others do not prints? > + return NULL; > + } > + > + ah = malloc(sizeof *ah); > + if (ah == NULL) > + return NULL; > + > + av = &ah->av; > + av->port_num = attr->port_num; > + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); > + av->network_type = > + ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ? > + RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6; > + > + rdma_gid2ip(&av->sgid_addr, &sgid); > + rdma_gid2ip(&av->dgid_addr, &attr->grh.dgid); > + > + memset(&resp, 0, sizeof(resp)); > + if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp))) { > + free(ah); > + return NULL; > + } > + > + return &ah->ibv_ah; > +} > + > +static int rfc_destroy_ah(struct ibv_ah *ibah) > +{ > + int ret; > + struct rfc_ah *ah = to_rah(ibah); > + > + ret = ibv_cmd_destroy_ah(&ah->ibv_ah); > + if (ret) > + return ret; > + > + free(ah); > + return 0; > +} > + > +static const struct verbs_context_ops rfc_ctx_ops = { > + .query_device = rfc_query_device, > + .query_port = rfc_query_port, > + .alloc_pd = rfc_alloc_pd, > + .dealloc_pd = rfc_dealloc_pd, > + .reg_mr = rfc_reg_mr, > + .dereg_mr = rfc_dereg_mr, > + .create_cq = rfc_create_cq, > + .poll_cq = rfc_poll_cq, > + .req_notify_cq = ibv_cmd_req_notify_cq, > + .resize_cq = rfc_resize_cq, > + .destroy_cq = rfc_destroy_cq, > + .create_srq = rfc_create_srq, > + .modify_srq = rfc_modify_srq, > + .query_srq = rfc_query_srq, > + .destroy_srq = rfc_destroy_srq, > + .post_srq_recv = rfc_post_srq_recv, > + .create_qp = rfc_create_qp, > + .query_qp = rfc_query_qp, > + .modify_qp = rfc_modify_qp, > + .destroy_qp = rfc_destroy_qp, > + .post_send = rfc_post_send, > + .post_recv = rfc_post_recv, > + .create_ah = rfc_create_ah, > + .destroy_ah = rfc_destroy_ah, > + .attach_mcast = ibv_cmd_attach_mcast, > + .detach_mcast = ibv_cmd_detach_mcast > +}; > + > +static struct verbs_context *rfc_alloc_context(struct ibv_device *ibdev, > + int cmd_fd) > +{ > + struct rfc_context *context; > + struct ibv_get_context cmd; > + struct ib_uverbs_get_context_resp resp; > + > + context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, > + RDMA_DRIVER_RXE); > + if (!context) > + return NULL; > + > + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, > + sizeof cmd, &resp, sizeof resp)) > + goto out; > + > + verbs_set_ops(&context->ibv_ctx, &rfc_ctx_ops); > + > + return &context->ibv_ctx; > + > +out: > + verbs_uninit_context(&context->ibv_ctx); > + free(context); > + return NULL; > +} > + > +static void rfc_free_context(struct ibv_context *ibctx) > +{ > + struct rfc_context *context = to_rctx(ibctx); > + > + verbs_uninit_context(&context->ibv_ctx); > + free(context); > +} > + > +static void rfc_uninit_device(struct verbs_device *verbs_device) > +{ > + struct rfc_device *dev = to_rdev(&verbs_device->device); > + > + free(dev); > +} > + > +static struct verbs_device *rfc_device_alloc(struct verbs_sysfs_dev *sysfs_dev) > +{ > + struct rfc_device *dev; > + dev = calloc(1, sizeof(*dev)); > + if (!dev) > + return NULL; > + > + dev->abi_version = sysfs_dev->abi_ver; > + > + return &dev->ibv_dev; > +} > + > +static const struct verbs_device_ops rfc_dev_ops = { > + .name = "rfc", > + /* > + * For 64 bit machines ABI version 1 and 2 are the same. Otherwise 32 > + * bit machines require ABI version 2 which guarentees the user and > + * kernel use the same ABI. > + */ > + .match_min_abi_version = sizeof(void *) == 8?1:2, > + .match_max_abi_version = 2, > + .match_table = hca_table, > + .alloc_device = rfc_device_alloc, > + .uninit_device = rfc_uninit_device, > + .alloc_context = rfc_alloc_context, > + .free_context = rfc_free_context, > +}; > +PROVIDER_DRIVER(rfc_dev_ops); > diff --git a/providers/rfc/rfc.h b/providers/rfc/rfc.h > new file mode 100644 > index 0000000..8313b19 > --- /dev/null > +++ b/providers/rfc/rfc.h > @@ -0,0 +1,129 @@ > +/* > + * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. > + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. > + * Copyright (c) 2006-2007 QLogic Corp. All rights reserved. > + * Copyright (c) 2005. PathScale, Inc. All rights reserved. Don't you want to add Broadcom here? > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#ifndef RXE_H > +#define RXE_H RXE? > + > +#include <infiniband/driver.h> > +#include <sys/socket.h> > +#include <netinet/in.h> > +#include <rdma/rdma_user_rfc.h> /* struct rfc_av */ > +#include "rfc-abi.h" > + > +enum rdma_network_type { > + RDMA_NETWORK_IB, > + RDMA_NETWORK_IPV4, > + RDMA_NETWORK_IPV6 > +}; If both RXE and RFC uses it, can we move it to generic place? > + > +struct rfc_device { > + struct verbs_device ibv_dev; > + int abi_version; > +}; > + > +struct rfc_context { > + struct verbs_context ibv_ctx; > +}; > + > +struct rfc_cq { > + struct ibv_cq ibv_cq; > + struct mminfo mmap_info; > + struct rfc_queue *queue; > + pthread_spinlock_t lock; > +}; > + > +struct rfc_ah { > + struct ibv_ah ibv_ah; > + struct rfc_av av; > +}; > + > +struct rfc_wq { > + struct rfc_queue *queue; > + pthread_spinlock_t lock; > + unsigned int max_sge; > + unsigned int max_inline; > +}; > + > +struct rfc_qp { > + struct ibv_qp ibv_qp; > + struct mminfo rq_mmap_info; > + struct rfc_wq rq; > + struct mminfo sq_mmap_info; > + struct rfc_wq sq; > + unsigned int ssn; > +}; > + > +#define qp_type(qp) ((qp)->ibv_qp.qp_type) > + > +struct rfc_srq { > + struct ibv_srq ibv_srq; > + struct mminfo mmap_info; > + struct rfc_wq rq; > + uint32_t srq_num; > +}; > + > +#define to_rxxx(xxx, type) container_of(ib##xxx, struct rfc_##type, ibv_##xxx) > + > +static inline struct rfc_context *to_rctx(struct ibv_context *ibctx) > +{ > + return container_of(ibctx, struct rfc_context, ibv_ctx.context); > +} > + > +static inline struct rfc_device *to_rdev(struct ibv_device *ibdev) > +{ > + return container_of(ibdev, struct rfc_device, ibv_dev.device); > +} > + > +static inline struct rfc_cq *to_rcq(struct ibv_cq *ibcq) > +{ > + return to_rxxx(cq, cq); > +} > + > +static inline struct rfc_qp *to_rqp(struct ibv_qp *ibqp) > +{ > + return to_rxxx(qp, qp); > +} > + > +static inline struct rfc_srq *to_rsrq(struct ibv_srq *ibsrq) > +{ > + return to_rxxx(srq, srq); > +} > + > +static inline struct rfc_ah *to_rah(struct ibv_ah *ibah) > +{ > + return to_rxxx(ah, ah); > +} > + > +#endif /* RXE_H */ RXE? > diff --git a/providers/rfc/rfc_cfg.in b/providers/rfc/rfc_cfg.in > new file mode 100755 > index 0000000..0a8583d > --- /dev/null > +++ b/providers/rfc/rfc_cfg.in > @@ -0,0 +1,674 @@ > +#!/usr/bin/perl > + > +# * Copyright (c) 2009-2011 Mellanox Technologies Ltd. All rights reserved. > +# * Copyright (c) 2009-2011 System Fabric Works, Inc. All rights reserved. Don't you want to add Broadcom here? > +# * > +# * This software is available to you under a choice of one of two > +# * licenses. You may choose to be licensed under the terms of the GNU > +# * General Public License (GPL) Version 2, available from the file > +# * COPYING in the main directory of this source tree, or the > +# * OpenIB.org BSD license below: > +# * > +# * Redistribution and use in source and binary forms, with or > +# * without modification, are permitted provided that the following > +# * conditions are met: > +# * > +# * - Redistributions of source code must retain the above > +# * copyright notice, this list of conditions and the following > +# * disclaimer. > +# * > +# * - Redistributions in binary form must reproduce the above > +# * copyright notice, this list of conditions and the following > +# * disclaimer in the documentation and/or other materials > +# * provided with the distribution. > +# * > +# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > +# * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > +# * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > +# * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > +# * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > +# * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > +# * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > +# * SOFTWARE. > +# > + > +use warnings; > +use strict; > + > +use File::Basename; > +use File::Path qw(make_path); > +use Getopt::Long; > + > +my $help = 0; > +my $no_persist = 0; > +my $debug = 0; > +my $force = 0; > +my $linkonly = 0; > +my $parms = "/sys/module/rdma_rxe/parameters"; > +my $modprobe_opt = ""; > +my $modprobe_checked = "0"; > +my $persistence_path = "@CMAKE_INSTALL_FULL_SHAREDSTATEDIR@/rxe"; > +my $persistence_file = "${persistence_path}/rxe"; > +my $num_persistent = 0; > +my $sys = "/sys/module/rdma_rxe/parameters"; > +my %rxe_names; > +my @rxe_array; > +my %eth_names; > +my @eth_list; > +my %eth_driver; > +my %link_state; > +my %link_speed; > +my %eth_mtu; > +my %ipv4_addr; > +my %rxe_mtu; > +my @persistence_array; > +my %persistence_hash; > +my @mlx4_port; > +my @mlx4_ether; > +my @roce_list; > + > +# Read a file and return its contents as a string. > +sub read_file { > + my $filename = shift; > + my $result = ""; > + > + if (open(FILE, $filename)) { > + $result = <FILE>; > + close FILE; > + } > + return $result; > +} > + > +#get mapping between rxe and eth devices > +sub get_names { > + my $i = 0; > + > + foreach my $rxe (glob("/sys/class/infiniband/rxe*")) { > + $rxe = basename($rxe); > + my $eth = read_file("/sys/class/infiniband/$rxe/parent"); > + chomp($eth); > + > + if (($eth =~ /[\w]+[\d]/) > + && ($rxe =~ /rxe[0123456789]/)) { > + > + # hash ethername to rxename > + $rxe_names{$eth} = $rxe; > + $rxe_array[$i++] = $rxe; > + > + # hash rxename to ethername > + $eth_names{$rxe} = $eth; > + } > + } > +} > + > +# get list of Mellanox RoCE ports > +sub get_mlx4_list { > + my $i = 0; > + > + foreach my $mlx4 (glob("/sys/class/infiniband/mlx4_*")) { > + $mlx4 = basename($mlx4); > + foreach my $port (glob("/sys/class/infiniband/$mlx4/ports/*")) { > + $port = basename($port); > + my $link = read_file("$port/link_layer"); > + chomp($link); > + > + if ($link =~ "Ethernet") { > + $roce_list[$i++] = "$mlx4:$port"; > + } > + } > + } > +} > + > +#collect per device information > +sub get_dev_info { > + my @list; > + my @fields; > + my @lines; > + my $line; > + my $eth; > + my $drv; > + my $np; > + my $i = 0; > + my $j = 0; > + > + get_mlx4_list(); > + > + my @my_eth_list = (); > + foreach my $my_eth_dev (glob("/sys/class/net/*")) { > + $my_eth_dev = basename($my_eth_dev); > + my $my_dev_type = read_file("/sys/class/net/${my_eth_dev}/type"); > + chomp($my_dev_type); > + if ($my_dev_type == "1") { > + push(@my_eth_list, "$my_eth_dev"); > + } > + } > + > + @list = @my_eth_list; > + foreach $eth (@list) { > + chomp($eth); > + > + $eth_list[$i++] = $eth; > + > + @lines = `ethtool -i $eth`; > + foreach $line (@lines) { > + chomp($line); > + > + @fields = split(/\s+/, $line); > + chomp($fields[0]); > + > + if ($fields[0] =~ /driver:/) { > + $drv = $fields[1]; > + $eth_driver{$eth} = $drv; > + > + if ($drv =~ /mlx4_en/ && scalar(@roce_list) > 0 ) { > + $eth_names{$roce_list[$j++]} = $eth; > + } > + } > + } > + > + # get link status > + $link_state{$eth} = ""; > + $link_speed{$eth} = ""; > + > + @lines = `ethtool $eth`; > + foreach $line (@lines) { > + chomp($line); > + > + @fields = split(/:/, $line); > + if (defined($fields[1])) { > + $fields[1] =~ s/^\s+//g; > + if ($fields[0] =~ "Link detected") { > + $link_state{$eth} = $fields[1]; > + } > + } > + elsif ($line =~ "10000baseT") { > + $link_speed{$eth} = "10GigE"; > + } > + } > + > + $ipv4_addr{$eth} = " "; > + $eth_mtu{$eth} = ""; > + > + @lines = `ifconfig $eth`; > + foreach $line (@lines) { > + # get IP address > + if ($line =~ /inet addr/) { > + $line =~ s/^\s+inet addr://g; > + @fields = split(/\s+/, $line); > + $ipv4_addr{$eth} = $fields[0]; > + } > + > + # get ethernet mtu > + if ($line =~ /MTU:/) { > + $line =~ s/^.*MTU://g; > + @fields = split(/\s+/, $line); > + $eth_mtu{$eth} = $fields[0]; > + } > + } > + } > + > + # get rxe mtu > + foreach my $rxe (@rxe_array) { > + > + @lines = `ibv_devinfo -d $rxe`; > + foreach $line (@lines) { > + if ($line =~ "active_mtu") { > + $line =~ s/^\s+active_mtu:\s+//g; > + chomp($line); > + > + $rxe_mtu{$rxe} = $line; > + } > + } > + $rxe_mtu{$rxe} = "(?)" if (!$rxe_mtu{$rxe}); > + } > +} > + > +# return string or the string "###" if string is all whitespace > +sub set_field { > + my $fld = $_[0]; > + > + if (defined($fld) && $fld =~ /\S/) { > + return $fld; > + } else { > + return "###"; > + } > +} > + > +# format status output into fixed width columns > +sub status_print { > + my @fields; > + my $field; > + my @flen = (); > + my $num_fields = 0; > + my $i; > + my $pad; > + my $line; > + > + # one pass to size the columns > + foreach $line (@_) { > + @fields = split(/\s+/, $line); > + $i = 0; > + foreach $field (@fields) { > + if (!defined($flen[$i])) { > + $flen[$i] = length($field); > + } > + else { > + $flen[$i] = max($flen[$i], length($field)); > + } > + $i++; > + } > + > + if ($i > $num_fields) { > + $num_fields = $i; > + } > + } > + > + # one pass to print > + foreach $line (@_) { > + print " "; > + @fields = split(/\s+/, $line); > + for ($i = 0; $i < $num_fields; $i++) { > + if (defined($fields[$i])) { > + $pad = $flen[$i] - length($fields[$i]) + 2; > + } > + else { > + $pad = $flen[$i] + 2; > + } > + if (defined($fields[$i]) && ($fields[$i] ne "###")) { > + print "$fields[$i]"; > + } > + else { > + print " "; > + } > + printf("%*s", $pad, ""); > + } > + print "\n"; > + } > +} > + > +# check driver load status > +sub check_module_status { > + if (-e $sys) { > + return 0; > + } else { > + return 1; > + } > +} > + > +# print driver load status and ethertype for rdma_rxe and rdma_rxe_net > +sub show_module_status { > + print "rdma_rxe module not loaded\n" if (!(-e $sys)); > +} > + > +# print rxe status > +sub do_status { > + my $instance = $_[0]; > + my $ln = 0; > + my @outp; > + my $rxe; > + my $rmtu; > + > + get_names(); > + get_dev_info(); > + show_module_status(); > + > + $outp[$ln++] = "Name\tLink\tDriver\t\tSpeed\tNMTU\tIPv4_addr\tRDEV\tRMTU"; > + > + foreach my $eth (@eth_list) { > + > + # handle case where rxe_drivers are not loaded > + if (defined($rxe_names{$eth})) { > + $rxe = $rxe_names{$eth}; > + $rmtu = $rxe_mtu{$rxe}; > + } > + else { > + $rxe = ""; > + $rmtu = ""; > + } > + > + if ((!defined($instance) > + && (($linkonly == 0) || ($link_state{$eth} =~ "yes"))) > + || (defined($instance) && ($rxe =~ "$instance"))) { > + $outp[$ln] = set_field("$eth"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$link_state{$eth}"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field(exists($eth_driver{$eth}) ? $eth_driver{$eth} : ""); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$link_speed{$eth}"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$eth_mtu{$eth}"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$ipv4_addr{$eth}"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$rxe"); > + $outp[$ln] .= "\t"; > + $outp[$ln] .= set_field("$rmtu"); > + $ln++; > + } > + } > + > + status_print(@outp); > +} > + > +# read file containing list of ethernet devices into a list > +sub populate_persistence { > + my $i = 0; > + > + open FILE, $persistence_file; > + while(<FILE>) { > + my $line = $_; > + chomp($line); > + $line =~ s/^\s+//g; > + if ($line =~ /[\w]+[\d]/) { > + # in case we add fields later > + my ($eth, $cruft) = split(/\s+/, $line, 2); > + if ($eth =~ /^[\w]+[\d]/) { > + $persistence_array[$i] = $eth; > + $persistence_hash{$eth} = $i++; > + } > + } > + } > + close FILE; > + > + $num_persistent = $i; > +} > + > +# print out list of ethernet devices to file > +sub commit_persistent { > + my $i; > + my $eth; > + > + open(PF, ">$persistence_file"); > + > + for ($i = 0; $i < $num_persistent; $i++) { > + $eth = $persistence_array[$i]; > + if ($eth =~ /[\w]+[\d]/) { > + print(PF "$persistence_array[$i]\n"); > + } > + } > + > + close(PF); > +} > + > +sub delete_persistent { > + my $eth = $_[0]; > + > + if (defined($persistence_hash{$eth})) { > + $persistence_array[$persistence_hash{$eth}] = ""; > + } > +} > + > +sub add_persistent { > + my $eth = $_[0]; > + > + # Is this one already in the persistence list? > + if (!defined($persistence_hash{$eth})) { > + $persistence_array[$num_persistent] = $eth; > + $persistence_hash{$eth} = $num_persistent; > + $num_persistent++; > + } > +} > + > +# add new rxe device to eth if not already up > +sub rxe_add { > + my $eth = $_[0]; > + > + if (!($eth =~ /[\w]+[\d]/)) { > + print "eth_name ($eth) looks bogus\n"; > + return; > + } > + > + if (!defined($rxe_names{$eth})) { > + system("echo '$eth' > $parms/add"); > + } > + if (!$no_persist) { > + add_persistent($eth); > + commit_persistent(); > + } > +} > + > +sub rxe_remove { > + my $arg2 = $_[0]; > + my $rxe; > + my $eth; > + > + print "remove $arg2\n" if ($debug > 0); > + > + if ($arg2 =~ /[\w]+[\d]/) { > + $eth = $arg2; > + $rxe = $rxe_names{$eth}; > + } > + elsif ($arg2 =~ /rxe[0123456789]/) { > + $rxe = $arg2; > + $eth = $eth_names{$rxe}; > + } > + elsif ($arg2 eq "all") { > + $rxe = "all"; > + } > + > + if (($rxe eq "all") || ($rxe =~ /^rxe[0123456789]/)) { > + my $cmd = "echo '$rxe' > $parms/remove"; > + #print "$cmd\n"; > + system($cmd); > + if (!$no_persist) { > + if ($rxe eq "all") { > + unlink($persistence_file); > + } > + elsif ($eth =~/[\w]+[\d]/) { > + delete_persistent($eth); > + commit_persistent(); > + } > + else { > + print "Warning: Unable to resolve ethname; " > + . "instance may persist on restart\n"; > + } > + } > + } > + else { > + print "rxe instance $rxe not found\n"; > + } > +} > + > +sub get_devinfo { > + my $rxe = $_[0]; > + > + my $cmd = "ibv_devinfo -d $rxe"; > + return `$cmd`; > +} > + > +# allow unsupported modules to load in SLES11 if allowed > +sub modprobe { > + my $module = $_[0]; > + my $opts = $_[1]; > + my @lines; > + my $line; > + > + if ($modprobe_checked == "0") { > + @lines = `modprobe -c`; > + foreach $line (@lines) { > + if ($line =~ /^allow_unsupported_modules *0/) { > + $modprobe_opt = " --allow-unsupported-modules "; > + last; > + } > + } > + $modprobe_checked = "1"; > + } > + > + if (!defined($opts)) { > + $opts = ""; > + } > + > + system("modprobe $modprobe_opt $module $opts"); > +} > + > +# bring up rxe > +sub do_start { > + my $proto_str = ""; > + > + system("mkdir -p $persistence_path"); > + system("touch $persistence_file"); > + > + modprobe("ib_core"); > + modprobe("ib_uverbs"); > + modprobe("rdma_ucm"); > + modprobe("rdma_rxe"); > + > + populate_persistence(); > + > + foreach my $eth (@persistence_array) { > + rxe_add($eth); > + } > + > + get_names(); > + > + foreach my $rxe (@rxe_array) { > + my $stat = get_devinfo($rxe); > + if ($stat =~ "PORT_DOWN") { > + my $cmd = "ifconfig $eth_names{$rxe} up"; > + system($cmd); > + } > + } > + > +} > + > +# check if argument is an integer > +sub is_integer { > + defined $_[0] && $_[0] =~ /^[+-]?\d+$/; > +} > + > +# remove all rxe devices and unload drivers rxe? > +sub do_stop { > + my $rxe; rxe? > + > + foreach $rxe (@rxe_array) { > + system("echo '$rxe' > $sys/remove"); etc... > + } > + > + if (-e $sys) { > + system("rmmod rdma_rxe"); > + } > + > + if (-e $sys) { > + print "unable to unload drivers, reboot required\n"; > + } > +} > + > +sub do_debug { > + my $arg2 = $_[0]; > + my $debugfile = "$parms/debug"; > + chomp($arg2); > + > + if (!(-e "$debugfile")) { > + print "Error: debug is compiled out of this rxe driver\n"; > + return; > + } > + > + if ($arg2 eq "on") { system("echo '31' > $debugfile"); } > + elsif ($arg2 eq "off") { system("echo '0' > $debugfile"); } > + elsif ($arg2 eq "0") { system("echo '0' > $debugfile"); } > + elsif ($arg2 eq "") { } > + elsif ($arg2 ge "0" && $arg2 le "31") { > + system("echo '$arg2' > $debugfile"); > + } > + else { > + print "unrecognized debug cmd ($arg2)\n"; > + } > + > + my $current = read_file($debugfile); > + chomp($current); > + if ($current > 0) { > + print "Debug is ON ($current)\n"; > + } > + elsif ($current == 0) { > + print "Debug is OFF\n"; > + } > + else { > + print "Unrecognized debug value\n"; > + } > +} > + > +sub max { > + my $a = $_[0]; > + my $b = $_[1]; > + return $a if ($a > $b); > + return $b; > +} > + > +# show usage for rxe_cfg > +sub usage { > + print " Usage:\n"; > + print " rxe_cfg [options] start|stop|status|persistent\n"; > + print " rxe_cfg debug on|off|<num>\n"; > + print " rxe_cfg [-n] add <ndev>\n"; > + print " rxe_cfg [-n] remove <ndev>|<rdev>\n"; > + print "\n"; > + print " <ndev> = network device e.g. eth3\n"; > + print " <rdev> = rdma device e.g. rxe1\n"; > + print "\n"; > + print " Options:\n"; > + print " -h: print this usage information\n"; > + print " -n: do not make the configuration action persistent\n"; > + print " -v: print additional debug output\n"; > + print " -l: show status for interfaces with link up\n"; > + print " -p <num>: (start command only) - set ethertype\n"; > +} > + > +sub main { > + GetOptions( > + "-h" => \$help, > + "--help" => \$help, > + "-n" => \$no_persist, > + "-v:+" => \$debug, > + "-f" => \$force, > + "-l" => \$linkonly, > + ); > + > + my $arg1 = $ARGV[0]; > + my $arg2 = $ARGV[1]; > + my $arg3 = $ARGV[2]; > + > + # status is the default > + if (!defined($arg1) || ($arg1 =~ /status/)) { > + do_status($arg2); > + exit; > + } > + > + if ($help) { > + usage(); > + exit; > + } > + > + # stuff that does not require modules to be loaded > + if ($arg1 eq "help") { usage(); exit; } > + elsif ($arg1 eq "start") { do_start(); do_status(); exit; } > + elsif ($arg1 eq "persistent") { system("cat $persistence_file"); exit; } > + > + > + # can't do much else, bail if modules aren't loaded > + if (check_module_status()) { > + exit; > + } > + > + # create persistence file if necessary > + make_path($persistence_path); > + if (!(-e $persistence_file)) { > + `touch $persistence_file`; > + } > + > + # Get full context of the configuration > + populate_persistence(); > + get_names(); > + get_dev_info(); > + > + # Stuff that requires the rdma_rxe module to be loaded > + if ($arg1 eq "stop") { do_stop(); exit; } > + elsif ($arg1 eq "debug") { do_debug($arg2); exit; } > + elsif ($arg1 eq "add") { rxe_add($arg2); exit; } > + elsif ($arg1 eq "remove") { rxe_remove($arg2); exit; } > + elsif ($arg1 eq "help") { usage(); exit; } > +} > + > +main(); > + > +exit; > diff --git a/providers/rfc/rfc_queue.h b/providers/rfc/rfc_queue.h > new file mode 100644 > index 0000000..a82e223 > --- /dev/null > +++ b/providers/rfc/rfc_queue.h > @@ -0,0 +1,128 @@ > +/* > + * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. > + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. Add yours... > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the fileA > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + */ > + > +/* implements a simple circular buffer with sizes a power of 2 */ > + > +#ifndef H_RXE_PCQ > +#define H_RXE_PCQ RXE? (Will not comment more on that....probably there are more) > + > +#include <stdint.h> > +#include <stdatomic.h> > + > +/* MUST MATCH kernel struct rfc_pqc in rfc_queue.h */ > +struct rfc_queue { > + uint32_t log2_elem_size; > + uint32_t index_mask; > + uint32_t pad_1[30]; > + _Atomic(uint32_t) producer_index; > + uint32_t pad_2[31]; > + _Atomic(uint32_t) consumer_index; > + uint32_t pad_3[31]; > + uint8_t data[0]; > +}; > + > +static inline int next_index(struct rfc_queue *q, int index) > +{ > + return (index + 1) & q->index_mask; > +} > + > +static inline int queue_empty(struct rfc_queue *q) > +{ > + /* Must hold consumer_index lock */ > + return ((atomic_load(&q->producer_index) - > + atomic_load_explicit(&q->consumer_index, > + memory_order_relaxed)) & > + q->index_mask) == 0; > +} > + > +static inline int queue_full(struct rfc_queue *q) > +{ > + /* Must hold producer_index lock */ > + return ((atomic_load_explicit(&q->producer_index, > + memory_order_relaxed) + > + 1 - atomic_load(&q->consumer_index)) & > + q->index_mask) == 0; > +} > + > +static inline void advance_producer(struct rfc_queue *q) > +{ > + /* Must hold producer_index lock */ > + atomic_thread_fence(memory_order_release); > + atomic_store( > + &q->producer_index, > + (atomic_load_explicit(&q->producer_index, memory_order_relaxed) + > + 1) & > + q->index_mask); Join the above two lines. > +} > + > +static inline void advance_consumer(struct rfc_queue *q) > +{ > + /* Must hold consumer_index lock */ > + atomic_store( > + &q->consumer_index, > + (atomic_load_explicit(&q->consumer_index, memory_order_relaxed) + > + 1) & > + q->index_mask); Join the above two lines. > +} > + > +static inline void *producer_addr(struct rfc_queue *q) > +{ > + /* Must hold producer_index lock */ > + return q->data + ((atomic_load_explicit(&q->producer_index, > + memory_order_relaxed) & > + q->index_mask) > + << q->log2_elem_size); > +} > + > +static inline void *consumer_addr(struct rfc_queue *q) > +{ > + /* Must hold consumer_index lock */ > + return q->data + ((atomic_load_explicit(&q->consumer_index, > + memory_order_relaxed) & > + q->index_mask) > + << q->log2_elem_size); > +} > + > +static inline void *addr_from_index(struct rfc_queue *q, unsigned int index) > +{ > + return q->data + ((index & q->index_mask) > + << q->log2_elem_size); > +} > + > +static inline unsigned int index_from_addr(const struct rfc_queue *q, const void *addr) > +{ > + return (((uint8_t *)addr - q->data) >> q->log2_elem_size) & q->index_mask; > +} > + > +#endif /* H_RXE_PCQ */ > -- > 1.8.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html