[PATCH 1/1] librfc for RDMA over Fibre Channel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Muneendra <muneendra.kumar@xxxxxxxxxxxx>

This patch adds the following:
1. librfc provider library for rdma-core, which acts as user
   level interface for rdma_rfc kernel module.
2. rfc_cfg utility, which helps in loading and configuring
   the rdma_rfc Kernel module.

This patch is inspired from librxe which provides the library for Soft RoCE kernel module.

The Corresponding kernel module(rdma_rfc) changes has been sent for review and the details
are below.

https://marc.info/?l=linux-rdma&m=152404459816049&w=2
Signed-off-by: Muneendra <muneendra.kumar@xxxxxxxxxxxx>
---
 CMakeLists.txt                      |   2 +
 kernel-headers/CMakeLists.txt       |   2 +
 kernel-headers/rdma/rdma_user_rfc.h | 179 +++++++
 providers/rfc/CMakeLists.txt        |   8 +
 providers/rfc/man/CMakeLists.txt    |   4 +
 providers/rfc/man/rfc.7             |  77 +++
 providers/rfc/man/rfc_cfg.8         |  70 +++
 providers/rfc/rfc-abi.h             |  53 +++
 providers/rfc/rfc.c                 | 926 ++++++++++++++++++++++++++++++++++++
 providers/rfc/rfc.h                 | 129 +++++
 providers/rfc/rfc_cfg.in            | 674 ++++++++++++++++++++++++++
 providers/rfc/rfc_queue.h           | 128 +++++
 12 files changed, 2252 insertions(+)
 create mode 100644 kernel-headers/rdma/rdma_user_rfc.h
 create mode 100644 providers/rfc/CMakeLists.txt
 create mode 100644 providers/rfc/man/CMakeLists.txt
 create mode 100644 providers/rfc/man/rfc.7
 create mode 100644 providers/rfc/man/rfc_cfg.8
 create mode 100644 providers/rfc/rfc-abi.h
 create mode 100644 providers/rfc/rfc.c
 create mode 100644 providers/rfc/rfc.h
 create mode 100755 providers/rfc/rfc_cfg.in
 create mode 100644 providers/rfc/rfc_queue.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10a687c..0256bbd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -502,6 +502,8 @@ add_subdirectory(providers/hfi1verbs)
 add_subdirectory(providers/ipathverbs)
 add_subdirectory(providers/rxe)
 add_subdirectory(providers/rxe/man)
+add_subdirectory(providers/rfc)
+add_subdirectory(providers/rfc/man)
 
 # Binaries
 add_subdirectory(ibacm) # NO SPARSE
diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
index 3a526b9..5d280e4 100644
--- a/kernel-headers/CMakeLists.txt
+++ b/kernel-headers/CMakeLists.txt
@@ -22,6 +22,7 @@ publish_internal_headers(rdma
   rdma/rdma_user_ioctl.h
   rdma/rdma_user_ioctl_cmds.h
   rdma/rdma_user_rxe.h
+  rdma/rdma_user_rfc.h
   rdma/vmw_pvrdma-abi.h
   )
 
@@ -69,6 +70,7 @@ rdma_kernel_provider_abi(
   rdma/ocrdma-abi.h
   rdma/qedr-abi.h
   rdma/rdma_user_rxe.h
+  rdma/rdma_user_rfc.h
   rdma/vmw_pvrdma-abi.h
   )
 
diff --git a/kernel-headers/rdma/rdma_user_rfc.h b/kernel-headers/rdma/rdma_user_rfc.h
new file mode 100644
index 0000000..8c6b10d
--- /dev/null
+++ b/kernel-headers/rdma/rdma_user_rfc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_RXE_H
+#define RDMA_USER_RXE_H
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+union rfc_gid {
+	__u8	raw[16];
+	struct {
+		__be64	subnet_prefix;
+		__be64	interface_id;
+	} global;
+};
+
+struct rfc_global_route {
+	union rfc_gid	dgid;
+	__u32		flow_label;
+	__u8		sgid_index;
+	__u8		hop_limit;
+	__u8		traffic_class;
+};
+
+struct rfc_av {
+	__u8			port_num;
+	__u8			network_type;
+	__u16			reserved1;
+	__u32			reserved2;
+	struct rfc_global_route	grh;
+	union {
+		struct sockaddr_in	_sockaddr_in;
+		struct sockaddr_in6	_sockaddr_in6;
+	} sgid_addr, dgid_addr;
+};
+
+struct rfc_send_wr {
+	__aligned_u64		wr_id;
+	__u32			num_sge;
+	__u32			opcode;
+	__u32			send_flags;
+	union {
+		__be32		imm_data;
+		__u32		invalidate_rkey;
+	} ex;
+	union {
+		struct {
+			__aligned_u64 remote_addr;
+			__u32	rkey;
+			__u32	reserved;
+		} rdma;
+		struct {
+			__aligned_u64 remote_addr;
+			__aligned_u64 compare_add;
+			__aligned_u64 swap;
+			__u32	rkey;
+			__u32	reserved;
+		} atomic;
+		struct {
+			__u32	remote_qpn;
+			__u32	remote_qkey;
+			__u16	pkey_index;
+		} ud;
+		/* reg is only used by the kernel and is not part of the uapi */
+		struct {
+			union {
+				struct ib_mr *mr;
+				__aligned_u64 reserved;
+			};
+			__u32        key;
+			__u32        access;
+		} reg;
+	} wr;
+};
+
+struct rfc_sge {
+	__aligned_u64 addr;
+	__u32	length;
+	__u32	lkey;
+};
+
+struct mminfo {
+	__aligned_u64  		offset;
+	__u32			size;
+	__u32			pad;
+};
+
+struct rfc_dma_info {
+	__u32			length;
+	__u32			resid;
+	__u32			cur_sge;
+	__u32			num_sge;
+	__u32			sge_offset;
+	__u32			reserved;
+	union {
+		__u8		inline_data[0];
+		struct rfc_sge	sge[0];
+	};
+};
+
+struct rfc_send_wqe {
+	struct rfc_send_wr	wr;
+	struct rfc_av		av;
+	__u32			status;
+	__u32			state;
+	__aligned_u64		iova;
+	__u32			mask;
+	__u32			first_psn;
+	__u32			last_psn;
+	__u32			ack_length;
+	__u32			ssn;
+	__u32			has_rd_atomic;
+	struct rfc_dma_info	dma;
+};
+
+struct rfc_recv_wqe {
+	__aligned_u64		wr_id;
+	__u32			num_sge;
+	__u32			padding;
+	struct rfc_dma_info	dma;
+};
+
+struct rfc_create_cq_resp {
+	struct mminfo mi;
+};
+
+struct rfc_resize_cq_resp {
+	struct mminfo mi;
+};
+
+struct rfc_create_qp_resp {
+	struct mminfo rq_mi;
+	struct mminfo sq_mi;
+};
+
+struct rfc_create_srq_resp {
+	struct mminfo mi;
+	__u32 srq_num;
+	__u32 reserved;
+};
+
+struct rfc_modify_srq_cmd {
+	__aligned_u64 mmap_info_addr;
+};
+
+#endif /* RDMA_USER_RXE_H */
diff --git a/providers/rfc/CMakeLists.txt b/providers/rfc/CMakeLists.txt
new file mode 100644
index 0000000..3123311
--- /dev/null
+++ b/providers/rfc/CMakeLists.txt
@@ -0,0 +1,8 @@
+rdma_provider(rfc
+  rfc.c
+  )
+rdma_subst_install(FILES "rfc_cfg.in"
+  RENAME "rfc_cfg"
+  DESTINATION "${CMAKE_INSTALL_BINDIR}"
+  PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE
+  )
diff --git a/providers/rfc/man/CMakeLists.txt b/providers/rfc/man/CMakeLists.txt
new file mode 100644
index 0000000..145855c
--- /dev/null
+++ b/providers/rfc/man/CMakeLists.txt
@@ -0,0 +1,4 @@
+rdma_man_pages(
+  rfc.7
+  rfc_cfg.8
+)
diff --git a/providers/rfc/man/rfc.7 b/providers/rfc/man/rfc.7
new file mode 100644
index 0000000..594d6cd
--- /dev/null
+++ b/providers/rfc/man/rfc.7
@@ -0,0 +1,77 @@
+.\" -*- nroff -*-
+.\"
+.TH RFC 7 2011-06-29 1.0.0
+.SH "NAME"
+rfc \- Software RDMA over FC
+.SH "SYNOPSIS"
+\fBmodprobe rdma_rfc\fR
+.br
+This is usually performed by a configuration utility (see \fBrfc_cfg\fR(8).)
+
+.SH "DESCRIPTION"
+The rdma_rfc kernel module provides a software implementation of RDMA over
+Fibre channel. It encapsulates RDMA payloads in FC-NVMe READ/WRITE requests
+and sends them over Fibre channel fabrics.
+The InfiniBand (IB) Base Transport Header (BTH) is encapsulated in the FC-NVMe
+header.
+
+Once a RFC instance has been created, communicating via RFC the same as
+communicating via any OFED compatible Infiniband HCA, albeit in some cases with
+addressing implications.
+
+Verbs applications written over IB verbs should work seamlessly except for the
+following constraints in current release-
+1. Partitioning is not supported. RFC module ignores any partition key in BTH.
+2. Inline and Immediate data size >= 64KB is not supported.
+3. only Reliable connection(RC) and Unreliable datagram(UD) type queue pairs
+   are supported.
+
+.SH "FILES"
+.TP
+\fB/sys/class/infiniband/rfc[0,1,...]\fR
+Directory that holds RDMA device information. The format is the same as other RDMA devices.
+
+.TP
+\fB/sys/module/rdma_rfc_net/parameters/add\fR
+Write only file used by \fBrfc_cfg(8)\fR to add new RFC devices to existing Ethernet devices.
+
+.TP
+\fB/sys/module/rdma_rfc_net/parameters/remove\fR
+Write only file used by \fBrfc_cfg(8)\fR to remove RFC devices.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_qp\fR
+Read/Write file that sets a limit on the number of QPs allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_qp_wr\fR
+Read/Write file that sets a limit on the number of WRs per QP allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_mr\fR
+Read/Write file that sets a limit on the number of MRs allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_fmr\fR
+Read/Write file that sets a limit on the number of FMRs allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_cq\fR
+Read/Write file that sets a limit on the number of CQs allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_log_cqe\fR
+Read/Write file that sets a limit on the log base 2 of the number of CQEs per CQ allowed per RFC device.
+
+.TP
+\fB/sys/module/rdma_rfc/parameters/max_inline_data\fR
+Read/Write file that sets a limit on the maximum amount of inline data per WR allowed per RFC device.
+
+The above configuration parameters only affect a new RFC instance when it is created not afterwards.
+
+.SH "SEE ALSO"
+.BR rfc_cfg (8),
+.BR verbs (7),
+
+.SH "AUTHORS"
+Written by Muneendra Kumar, Anand Sundaram, Amit Tyagi at Broadcom INC.
diff --git a/providers/rfc/man/rfc_cfg.8 b/providers/rfc/man/rfc_cfg.8
new file mode 100644
index 0000000..8c12bbf
--- /dev/null
+++ b/providers/rfc/man/rfc_cfg.8
@@ -0,0 +1,70 @@
+.\" -*- nroff -*-
+.\"
+.TH RFC_CFG 8 2011-06-29 1.0.0
+.SH "NAME"
+rfc_cfg \- rfc configuration tool for RFC (Soft RFC)
+.SH "SYNOPSIS"
+\fBrfc_cfg [status]\fR
+.br
+\fBrfc_cfg start\fR [\fB\-p\fR \fIproto\fR]
+.br
+\fBrfc_cfg stop\fR
+.br
+\fBrfc_cfg persistent\fR
+.br
+\fBrfc_cfg add\fR [\fB\-n\fR] \fIethN\fR
+.br
+\fBrfc_cfg remove\fR [\fB\-n\fR] \fIethN\fR|\fIrfcN\fR
+.br
+.SH "DESCRIPTION"
+rfc_cfg is the configuration tool for the RFC software implementation of the RFC protocol.  
+
+The RFC kernel modules are loaded, configured, reconfigured and unloaded via the various rfc_cfg command options, documented below.
+
+.SH "PARAMETERS"
+.TP
+\fIethN\fR
+Network device name as listed in /sys/class/net. Only RFC Ethernet devices are supported; ie. rfcnet0.
+
+.TP
+\fIrfcN\fR
+RFC device name as listed in /sys/class/infiniband/. Examples are rfc0 or rfc1.
+
+.SH "COMMANDS"
+.TP
+[\fBstatus\fR]
+The \fBstatus\fR command prints a table of information on available Ethernet devices and configured RFC instances.  The status display is the default if no options are provided.
+
+.TP
+\fBstart\fR [\fB\-p\fR \fIproto\fR]
+The \fBstart\fR command loads the RFC modules and configures any persistent instances.
+
+.TP
+\fBstop\fR
+The \fBstop\fR command unconfigures all RFC instances and attempts to unload the kernel modules.
+
+.TP
+\fBpersistent\fR
+The \fBpersistent\fR command prints the list of Ethernet devices for which a RFC instance is persistently configured.
+
+.TP
+\fBadd\fR [\fB\-n\fR] \fIethN\fR
+The \fBadd\fR command will only configure a RFC instance on RFC Ethernet device \fIrfcnetN\fR (e.g. rfcnet0).  The RFC modules must have already been loaded via \fBrfc_cfg start\fR.
+
+The default behavior is to add \fIrfcnetN\fR to a file of persistent configurations and the same RFC device will be configured the next time that \fBrfc_cfg start\fR is run.  If the \fB-n\fR option is included the device is not added to the persistence file.
+
+.TP
+\fBremove\fR [\fB\-n\fR] \fIethN\fR|\fIrfcN\fR
+The \fBremove\fR command will remove the specified RFC instance.  The parameter must match a currently active rfcnetN or rfcN name.
+
+If the \fB-n\fR option is included the RFC device will be removed but not removed from the persistent state. So it will be recreated the next time that \fBrfc_cfg start\fR is run.
+
+.SH "FILES"
+.TP
+\fB[PREFIX]/etc/rfc.conf\fR
+RFC configuration file. Contains the list of persistent RFC instances.  All persistent RFC instances can be removed by deleting this file (note this will take effect on the next "rfc_cfg start" -- to remove actively configured instances, you must "rfc_cfg stop").
+
+.SH "SEE ALSO"
+.BR rfc (7),
+.SH "AUTHORS"
+Written by Muneendra Kumar, Anand Sundaram, Amit Tyagi at Broadcom INC.
diff --git a/providers/rfc/rfc-abi.h b/providers/rfc/rfc-abi.h
new file mode 100644
index 0000000..a36a9ef
--- /dev/null
+++ b/providers/rfc/rfc-abi.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef RXE_ABI_H
+#define RXE_ABI_H
+
+#include <infiniband/kern-abi.h>
+#include <rdma/rdma_user_rfc.h>
+#include <kernel-abi/rdma_user_rfc.h>
+
+DECLARE_DRV_CMD(urfc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
+		empty, rfc_create_cq_resp);
+DECLARE_DRV_CMD(urfc_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
+		empty, rfc_create_qp_resp);
+DECLARE_DRV_CMD(urfc_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
+		empty, rfc_create_srq_resp);
+DECLARE_DRV_CMD(urfc_modify_srq, IB_USER_VERBS_CMD_MODIFY_SRQ,
+		rfc_modify_srq_cmd, empty);
+DECLARE_DRV_CMD(urfc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
+		empty, rfc_resize_cq_resp);
+
+#endif /* RXE_ABI_H */
diff --git a/providers/rfc/rfc.c b/providers/rfc/rfc.c
new file mode 100644
index 0000000..0611bc1
--- /dev/null
+++ b/providers/rfc/rfc.c
@@ -0,0 +1,926 @@
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (C) 2006-2007 QLogic Corporation, All rights reserved.
+ * Copyright (c) 2005. PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <endian.h>
+#include <pthread.h>
+#include <stddef.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/verbs.h>
+
+#include "rfc_queue.h"
+#include "rfc-abi.h"
+#include "rfc.h"
+
+static const struct verbs_match_ent hca_table[] = {
+	/* FIXME: rfc needs a more reliable way to detect the rfc device */
+	VERBS_NAME_MATCH("rfc", NULL),
+	{},
+};
+
+static int rfc_query_device(struct ibv_context *context,
+			    struct ibv_device_attr *attr)
+{
+	struct ibv_query_device cmd;
+	uint64_t raw_fw_ver;
+	unsigned major, minor, sub_minor;
+	int ret;
+
+	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver,
+				   &cmd, sizeof cmd);
+	if (ret)
+		return ret;
+
+	major = (raw_fw_ver >> 32) & 0xffff;
+	minor = (raw_fw_ver >> 16) & 0xffff;
+	sub_minor = raw_fw_ver & 0xffff;
+
+	snprintf(attr->fw_ver, sizeof attr->fw_ver,
+		 "%d.%d.%d", major, minor, sub_minor);
+
+	return 0;
+}
+
+static int rfc_query_port(struct ibv_context *context, uint8_t port,
+			  struct ibv_port_attr *attr)
+{
+	struct ibv_query_port cmd;
+
+	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
+}
+
+static struct ibv_pd *rfc_alloc_pd(struct ibv_context *context)
+{
+	struct ibv_alloc_pd cmd;
+	struct ib_uverbs_alloc_pd_resp resp;
+	struct ibv_pd *pd;
+
+	pd = malloc(sizeof *pd);
+	if (!pd)
+		return NULL;
+
+	if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, &resp, sizeof resp)) {
+		free(pd);
+		return NULL;
+	}
+
+	return pd;
+}
+
+static int rfc_dealloc_pd(struct ibv_pd *pd)
+{
+	int ret;
+
+	ret = ibv_cmd_dealloc_pd(pd);
+	if (!ret)
+		free(pd);
+
+	return ret;
+}
+
+static struct ibv_mr *rfc_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+				 int access)
+{
+	struct ibv_mr *mr;
+	struct ibv_reg_mr cmd;
+	struct ib_uverbs_reg_mr_resp resp;
+	int ret;
+
+	mr = malloc(sizeof *mr);
+	if (!mr) {
+		return NULL;
+	}
+
+	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, mr,
+			     &cmd, sizeof cmd, &resp, sizeof resp);
+	if (ret) {
+		free(mr);
+		return NULL;
+	}
+
+	return mr;
+}
+
+static int rfc_dereg_mr(struct ibv_mr *mr)
+{
+	int ret;
+
+	ret = ibv_cmd_dereg_mr(mr);
+	if (ret)
+		return ret;
+
+	free(mr);
+	return 0;
+}
+
+static struct ibv_cq *rfc_create_cq(struct ibv_context *context, int cqe,
+				    struct ibv_comp_channel *channel,
+				    int comp_vector)
+{
+	struct rfc_cq *cq;
+	struct urfc_create_cq_resp resp;
+	int ret;
+
+	cq = malloc(sizeof *cq);
+	if (!cq) {
+		return NULL;
+	}
+
+	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
+				&cq->ibv_cq, NULL, 0,
+				&resp.ibv_resp, sizeof resp);
+	if (ret) {
+		free(cq);
+		return NULL;
+	}
+
+	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			 context->cmd_fd, resp.mi.offset);
+	if ((void *)cq->queue == MAP_FAILED) {
+		ibv_cmd_destroy_cq(&cq->ibv_cq);
+		free(cq);
+		return NULL;
+	}
+
+	cq->mmap_info = resp.mi;
+	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &cq->ibv_cq;
+}
+
+static int rfc_resize_cq(struct ibv_cq *ibcq, int cqe)
+{
+	struct rfc_cq *cq = to_rcq(ibcq);
+	struct ibv_resize_cq cmd;
+	struct urfc_resize_cq_resp resp;
+	int ret;
+
+	pthread_spin_lock(&cq->lock);
+
+	ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof cmd,
+				&resp.ibv_resp, sizeof resp);
+	if (ret) {
+		pthread_spin_unlock(&cq->lock);
+		return ret;
+	}
+
+	munmap(cq->queue, cq->mmap_info.size);
+
+	cq->queue = mmap(NULL, resp.mi.size,
+			 PROT_READ | PROT_WRITE, MAP_SHARED,
+			 ibcq->context->cmd_fd, resp.mi.offset);
+
+	ret = errno;
+	pthread_spin_unlock(&cq->lock);
+
+	if ((void *)cq->queue == MAP_FAILED) {
+		cq->queue = NULL;
+		cq->mmap_info.size = 0;
+		return ret;
+	}
+
+	cq->mmap_info = resp.mi;
+
+	return 0;
+}
+
+static int rfc_destroy_cq(struct ibv_cq *ibcq)
+{
+	struct rfc_cq *cq = to_rcq(ibcq);
+	int ret;
+
+	ret = ibv_cmd_destroy_cq(ibcq);
+	if (ret)
+		return ret;
+
+	if (cq->mmap_info.size)
+		munmap(cq->queue, cq->mmap_info.size);
+	free(cq);
+
+	return 0;
+}
+
+static int rfc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+	struct rfc_cq *cq = to_rcq(ibcq);
+	struct rfc_queue *q;
+	int npolled;
+	uint8_t *src;
+
+	pthread_spin_lock(&cq->lock);
+	q = cq->queue;
+
+	for (npolled = 0; npolled < ne; ++npolled, ++wc) {
+		if (queue_empty(q))
+			break;
+
+		atomic_thread_fence(memory_order_acquire);
+		src = consumer_addr(q);
+		memcpy(wc, src, sizeof(*wc));
+		advance_consumer(q);
+	}
+
+	pthread_spin_unlock(&cq->lock);
+	return npolled;
+}
+
+static struct ibv_srq *rfc_create_srq(struct ibv_pd *pd,
+				      struct ibv_srq_init_attr *attr)
+{
+	struct rfc_srq *srq;
+	struct ibv_create_srq cmd;
+	struct urfc_create_srq_resp resp;
+	int ret;
+
+	srq = malloc(sizeof *srq);
+	if (srq == NULL) {
+		return NULL;
+	}
+
+	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, sizeof cmd,
+				 &resp.ibv_resp, sizeof resp);
+	if (ret) {
+		free(srq);
+		return NULL;
+	}
+
+	srq->rq.queue = mmap(NULL, resp.mi.size,
+			     PROT_READ | PROT_WRITE, MAP_SHARED,
+			     pd->context->cmd_fd, resp.mi.offset);
+	if ((void *)srq->rq.queue == MAP_FAILED) {
+		ibv_cmd_destroy_srq(&srq->ibv_srq);
+		free(srq);
+		return NULL;
+	}
+
+	srq->mmap_info = resp.mi;
+	srq->rq.max_sge = attr->attr.max_sge;
+	pthread_spin_init(&srq->rq.lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &srq->ibv_srq;
+}
+
+static int rfc_modify_srq(struct ibv_srq *ibsrq,
+		   struct ibv_srq_attr *attr, int attr_mask)
+{
+	struct rfc_srq *srq = to_rsrq(ibsrq);
+	struct urfc_modify_srq cmd;
+	int rc = 0;
+	struct mminfo mi;
+
+	mi.offset = 0;
+	mi.size = 0;
+
+	if (attr_mask & IBV_SRQ_MAX_WR)
+		pthread_spin_lock(&srq->rq.lock);
+
+	cmd.mmap_info_addr = (__u64)(uintptr_t) & mi;
+	rc = ibv_cmd_modify_srq(ibsrq, attr, attr_mask,
+				&cmd.ibv_cmd, sizeof cmd);
+	if (rc)
+		goto out;
+
+	if (attr_mask & IBV_SRQ_MAX_WR) {
+		(void)munmap(srq->rq.queue, srq->mmap_info.size);
+		srq->rq.queue = mmap(NULL, mi.size,
+				     PROT_READ | PROT_WRITE, MAP_SHARED,
+				     ibsrq->context->cmd_fd, mi.offset);
+
+		if ((void *)srq->rq.queue == MAP_FAILED) {
+			rc = errno;
+			srq->rq.queue = NULL;
+			srq->mmap_info.size = 0;
+			goto out;
+		}
+
+		srq->mmap_info = mi;
+	}
+
+out:
+	if (attr_mask & IBV_SRQ_MAX_WR)
+		pthread_spin_unlock(&srq->rq.lock);
+	return rc;
+}
+
+static int rfc_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr)
+{
+	struct ibv_query_srq cmd;
+
+	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+}
+
+static int rfc_destroy_srq(struct ibv_srq *ibvsrq)
+{
+	int ret;
+	struct rfc_srq *srq = to_rsrq(ibvsrq);
+	struct rfc_queue *q = srq->rq.queue;
+
+	ret = ibv_cmd_destroy_srq(ibvsrq);
+	if (!ret) {
+		if (srq->mmap_info.size)
+			munmap(q, srq->mmap_info.size);
+		free(srq);
+	}
+
+	return ret;
+}
+
+static int rfc_post_one_recv(struct rfc_wq *rq, struct ibv_recv_wr *recv_wr)
+{
+	int i;
+	struct rfc_recv_wqe *wqe;
+	struct rfc_queue *q = rq->queue;
+	int length = 0;
+	int rc = 0;
+
+	if (queue_full(q)) {
+		rc  = -ENOMEM;
+		goto out;
+	}
+
+	if (recv_wr->num_sge > rq->max_sge) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	wqe = (struct rfc_recv_wqe *)producer_addr(q);
+
+	wqe->wr_id = recv_wr->wr_id;
+	wqe->num_sge = recv_wr->num_sge;
+
+	memcpy(wqe->dma.sge, recv_wr->sg_list,
+	       wqe->num_sge*sizeof(*wqe->dma.sge));
+
+	for (i = 0; i < wqe->num_sge; i++) {
+		length += wqe->dma.sge[i].length;
+	}
+
+	wqe->dma.length = length;
+	wqe->dma.resid = length;
+	wqe->dma.cur_sge = 0;
+	wqe->dma.num_sge = wqe->num_sge;
+	wqe->dma.sge_offset = 0;
+
+	advance_producer(q);
+
+out:
+	return rc;
+}
+
+static int rfc_post_srq_recv(struct ibv_srq *ibvsrq,
+			     struct ibv_recv_wr *recv_wr,
+			     struct ibv_recv_wr **bad_recv_wr)
+{
+	struct rfc_srq *srq = to_rsrq(ibvsrq);
+	int rc = 0;
+
+	pthread_spin_lock(&srq->rq.lock);
+
+	while (recv_wr) {
+		rc = rfc_post_one_recv(&srq->rq, recv_wr);
+		if (rc) {
+			*bad_recv_wr = recv_wr;
+			break;
+		}
+
+		recv_wr = recv_wr->next;
+	}
+
+	pthread_spin_unlock(&srq->rq.lock);
+
+	return rc;
+}
+
+static struct ibv_qp *rfc_create_qp(struct ibv_pd *pd,
+				    struct ibv_qp_init_attr *attr)
+{
+	struct ibv_create_qp cmd;
+	struct urfc_create_qp_resp resp;
+	struct rfc_qp *qp;
+	int ret;
+
+	qp = malloc(sizeof *qp);
+	if (!qp) {
+		return NULL;
+	}
+
+	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof cmd,
+				&resp.ibv_resp, sizeof resp);
+	if (ret) {
+		free(qp);
+		return NULL;
+	}
+
+	if (attr->srq) {
+		qp->rq.max_sge = 0;
+		qp->rq.queue = NULL;
+		qp->rq_mmap_info.size = 0;
+	} else {
+		qp->rq.max_sge = attr->cap.max_recv_sge;
+		qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE,
+				    MAP_SHARED,
+				    pd->context->cmd_fd, resp.rq_mi.offset);
+		if ((void *)qp->rq.queue == MAP_FAILED) {
+			ibv_cmd_destroy_qp(&qp->ibv_qp);
+			free(qp);
+			return NULL;
+		}
+
+		qp->rq_mmap_info = resp.rq_mi;
+		pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE);
+	}
+
+	qp->sq.max_sge = attr->cap.max_send_sge;
+	qp->sq.max_inline = attr->cap.max_inline_data;
+	qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE,
+			    MAP_SHARED,
+			    pd->context->cmd_fd, resp.sq_mi.offset);
+	if ((void *)qp->sq.queue == MAP_FAILED) {
+		if (qp->rq_mmap_info.size)
+			munmap(qp->rq.queue, qp->rq_mmap_info.size);
+		ibv_cmd_destroy_qp(&qp->ibv_qp);
+		free(qp);
+		return NULL;
+	}
+
+	qp->sq_mmap_info = resp.sq_mi;
+	pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &qp->ibv_qp;
+}
+
+static int rfc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+			int attr_mask,
+			struct ibv_qp_init_attr *init_attr)
+{
+	struct ibv_query_qp cmd;
+
+	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr,
+				&cmd, sizeof cmd);
+}
+
+static int rfc_modify_qp(struct ibv_qp *ibvqp,
+			 struct ibv_qp_attr *attr,
+			 int attr_mask)
+{
+	struct ibv_modify_qp cmd = {};
+
+	return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof cmd);
+}
+
+static int rfc_destroy_qp(struct ibv_qp *ibv_qp)
+{
+	int ret;
+	struct rfc_qp *qp = to_rqp(ibv_qp);
+
+	ret = ibv_cmd_destroy_qp(ibv_qp);
+	if (!ret) {
+		if (qp->rq_mmap_info.size)
+			munmap(qp->rq.queue, qp->rq_mmap_info.size);
+		if (qp->sq_mmap_info.size)
+			munmap(qp->sq.queue, qp->sq_mmap_info.size);
+
+		free(qp);
+	}
+
+	return ret;
+}
+
+/* basic sanity checks for send work request */
+static int validate_send_wr(struct rfc_wq *sq, struct ibv_send_wr *ibwr,
+			    unsigned int length)
+{
+	enum ibv_wr_opcode opcode = ibwr->opcode;
+
+	if (ibwr->num_sge > sq->max_sge)
+		return -EINVAL;
+
+	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
+	    || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
+		if (length < 8 || ibwr->wr.atomic.remote_addr & 0x7)
+			return -EINVAL;
+
+	if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void convert_send_wr(struct rfc_send_wr *kwr, struct ibv_send_wr *uwr)
+{
+	memset(kwr, 0, sizeof(*kwr));
+
+	kwr->wr_id		= uwr->wr_id;
+	kwr->num_sge		= uwr->num_sge;
+	kwr->opcode		= uwr->opcode;
+	kwr->send_flags		= uwr->send_flags;
+	kwr->ex.imm_data	= uwr->imm_data;
+
+	switch(uwr->opcode) {
+	case IBV_WR_RDMA_WRITE:
+	case IBV_WR_RDMA_WRITE_WITH_IMM:
+	case IBV_WR_RDMA_READ:
+		kwr->wr.rdma.remote_addr	= uwr->wr.rdma.remote_addr;
+		kwr->wr.rdma.rkey		= uwr->wr.rdma.rkey;
+		break;
+
+	case IBV_WR_SEND:
+	case IBV_WR_SEND_WITH_IMM:
+		kwr->wr.ud.remote_qpn		= uwr->wr.ud.remote_qpn;
+		kwr->wr.ud.remote_qkey		= uwr->wr.ud.remote_qkey;
+		break;
+
+	case IBV_WR_ATOMIC_CMP_AND_SWP:
+	case IBV_WR_ATOMIC_FETCH_AND_ADD:
+		kwr->wr.atomic.remote_addr	= uwr->wr.atomic.remote_addr;
+		kwr->wr.atomic.compare_add	= uwr->wr.atomic.compare_add;
+		kwr->wr.atomic.swap		= uwr->wr.atomic.swap;
+		kwr->wr.atomic.rkey		= uwr->wr.atomic.rkey;
+		break;
+
+	case IBV_WR_LOCAL_INV:
+	case IBV_WR_BIND_MW:
+	case IBV_WR_SEND_WITH_INV:
+	case IBV_WR_TSO:
+		break;
+	}
+}
+
+static int init_send_wqe(struct rfc_qp *qp, struct rfc_wq *sq,
+		  struct ibv_send_wr *ibwr, unsigned int length,
+		  struct rfc_send_wqe *wqe)
+{
+	int num_sge = ibwr->num_sge;
+	int i;
+	unsigned int opcode = ibwr->opcode;
+
+	convert_send_wr(&wqe->wr, ibwr);
+
+	if (qp_type(qp) == IBV_QPT_UD)
+		memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
+		       sizeof(struct rfc_av));
+
+	if (ibwr->send_flags & IBV_SEND_INLINE) {
+		uint8_t *inline_data = wqe->dma.inline_data;
+
+		for (i = 0; i < num_sge; i++) {
+			memcpy(inline_data,
+			       (uint8_t *)(long)ibwr->sg_list[i].addr,
+			       ibwr->sg_list[i].length);
+			inline_data += ibwr->sg_list[i].length;
+		}
+	} else
+		memcpy(wqe->dma.sge, ibwr->sg_list,
+		       num_sge*sizeof(struct ibv_sge));
+
+	if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
+	    || (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
+		wqe->iova	= ibwr->wr.atomic.remote_addr;
+	else
+		wqe->iova	= ibwr->wr.rdma.remote_addr;
+	wqe->dma.length		= length;
+	wqe->dma.resid		= length;
+	wqe->dma.num_sge	= num_sge;
+	wqe->dma.cur_sge	= 0;
+	wqe->dma.sge_offset	= 0;
+	wqe->state		= 0;
+	wqe->ssn		= qp->ssn++;
+
+	return 0;
+}
+
+static int post_one_send(struct rfc_qp *qp, struct rfc_wq *sq,
+			 struct ibv_send_wr *ibwr)
+{
+	int err;
+	struct rfc_send_wqe *wqe;
+	unsigned int length = 0;
+	int i;
+
+	for (i = 0; i < ibwr->num_sge; i++)
+		length += ibwr->sg_list[i].length;
+
+	err = validate_send_wr(sq, ibwr, length);
+	if (err) {
+		printf("validate send failed\n");
+		return err;
+	}
+
+	wqe = (struct rfc_send_wqe *)producer_addr(sq->queue);
+
+	err = init_send_wqe(qp, sq, ibwr, length, wqe);
+	if (err)
+		return err;
+
+	if (queue_full(sq->queue))
+		return -ENOMEM;
+
+	advance_producer(sq->queue);
+
+	return 0;
+}
+
+/* send a null post send as a doorbell */
+static int post_send_db(struct ibv_qp *ibqp)
+{
+	struct ibv_post_send cmd;
+	struct ib_uverbs_post_send_resp resp;
+
+	cmd.hdr.command	= IB_USER_VERBS_CMD_POST_SEND;
+	cmd.hdr.in_words = sizeof(cmd) / 4;
+	cmd.hdr.out_words = sizeof(resp) / 4;
+	cmd.response	= (uintptr_t)&resp;
+	cmd.qp_handle	= ibqp->handle;
+	cmd.wr_count	= 0;
+	cmd.sge_count	= 0;
+	cmd.wqe_size	= sizeof(struct ibv_send_wr);
+
+	if (write(ibqp->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
+		return errno;
+
+	return 0;
+}
+
+/* this API does not make a distinction between
+   restartable and non-restartable errors */
+static int rfc_post_send(struct ibv_qp *ibqp,
+			 struct ibv_send_wr *wr_list,
+			 struct ibv_send_wr **bad_wr)
+{
+	int rc = 0;
+	int err;
+	struct rfc_qp *qp = to_rqp(ibqp);
+	struct rfc_wq *sq = &qp->sq;
+
+	if (!bad_wr)
+		return EINVAL;
+
+	*bad_wr = NULL;
+
+	if (!sq || !wr_list || !sq->queue)
+	 	return EINVAL;
+
+	pthread_spin_lock(&sq->lock);
+
+	while (wr_list) {
+		rc = post_one_send(qp, sq, wr_list);
+		if (rc) {
+			*bad_wr = wr_list;
+			break;
+		}
+
+		wr_list = wr_list->next;
+	}
+
+	pthread_spin_unlock(&sq->lock);
+
+	err =  post_send_db(ibqp);
+	return err ? err : rc;
+}
+
+static int rfc_post_recv(struct ibv_qp *ibqp,
+			 struct ibv_recv_wr *recv_wr,
+			 struct ibv_recv_wr **bad_wr)
+{
+	int rc = 0;
+	struct rfc_qp *qp = to_rqp(ibqp);
+	struct rfc_wq *rq = &qp->rq;
+
+	if (!bad_wr)
+		return EINVAL;
+
+	*bad_wr = NULL;
+
+	if (!rq || !recv_wr || !rq->queue)
+		return EINVAL;
+
+	pthread_spin_lock(&rq->lock);
+
+	while (recv_wr) {
+		rc = rfc_post_one_recv(rq, recv_wr);
+		if (rc) {
+			*bad_wr = recv_wr;
+			break;
+		}
+
+		recv_wr = recv_wr->next;
+	}
+
+	pthread_spin_unlock(&rq->lock);
+
+	return rc;
+}
+
+static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
+{
+	return IN6_IS_ADDR_V4MAPPED(a);
+}
+
+typedef typeof(((struct rfc_av *)0)->sgid_addr) sockaddr_union_t;
+
+static inline int rdma_gid2ip(sockaddr_union_t *out, union ibv_gid *gid)
+{
+	if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
+		memset(&out->_sockaddr_in, 0, sizeof(out->_sockaddr_in));
+		memcpy(&out->_sockaddr_in.sin_addr.s_addr, gid->raw + 12, 4);
+	} else {
+		memset(&out->_sockaddr_in6, 0, sizeof(out->_sockaddr_in6));
+		out->_sockaddr_in6.sin6_family = AF_INET6;
+		memcpy(&out->_sockaddr_in6.sin6_addr.s6_addr, gid->raw, 16);
+	}
+	return 0;
+}
+
+static struct ibv_ah *rfc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+	int err;
+	struct rfc_ah *ah;
+	struct rfc_av *av;
+	union ibv_gid sgid;
+	struct ib_uverbs_create_ah_resp resp;
+
+	err = ibv_query_gid(pd->context, attr->port_num, attr->grh.sgid_index,
+			    &sgid);
+	if (err) {
+		fprintf(stderr, "rfc: Failed to query sgid.\n");
+		return NULL;
+	}
+
+	ah = malloc(sizeof *ah);
+	if (ah == NULL)
+		return NULL;
+
+	av = &ah->av;
+	av->port_num = attr->port_num;
+	memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
+	av->network_type =
+		ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ?
+		RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6;
+
+	rdma_gid2ip(&av->sgid_addr, &sgid);
+	rdma_gid2ip(&av->dgid_addr, &attr->grh.dgid);
+
+	memset(&resp, 0, sizeof(resp));
+	if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp))) {
+		free(ah);
+		return NULL;
+	}
+
+	return &ah->ibv_ah;
+}
+
+static int rfc_destroy_ah(struct ibv_ah *ibah)
+{
+	int ret;
+	struct rfc_ah *ah = to_rah(ibah);
+
+	ret = ibv_cmd_destroy_ah(&ah->ibv_ah);
+	if (ret)
+		return ret;
+
+	free(ah);
+	return 0;
+}
+
+static const struct verbs_context_ops rfc_ctx_ops = {
+	.query_device = rfc_query_device,
+	.query_port = rfc_query_port,
+	.alloc_pd = rfc_alloc_pd,
+	.dealloc_pd = rfc_dealloc_pd,
+	.reg_mr = rfc_reg_mr,
+	.dereg_mr = rfc_dereg_mr,
+	.create_cq = rfc_create_cq,
+	.poll_cq = rfc_poll_cq,
+	.req_notify_cq = ibv_cmd_req_notify_cq,
+	.resize_cq = rfc_resize_cq,
+	.destroy_cq = rfc_destroy_cq,
+	.create_srq = rfc_create_srq,
+	.modify_srq = rfc_modify_srq,
+	.query_srq = rfc_query_srq,
+	.destroy_srq = rfc_destroy_srq,
+	.post_srq_recv = rfc_post_srq_recv,
+	.create_qp = rfc_create_qp,
+	.query_qp = rfc_query_qp,
+	.modify_qp = rfc_modify_qp,
+	.destroy_qp = rfc_destroy_qp,
+	.post_send = rfc_post_send,
+	.post_recv = rfc_post_recv,
+	.create_ah = rfc_create_ah,
+	.destroy_ah = rfc_destroy_ah,
+	.attach_mcast = ibv_cmd_attach_mcast,
+	.detach_mcast = ibv_cmd_detach_mcast
+};
+
+static struct verbs_context *rfc_alloc_context(struct ibv_device *ibdev,
+					       int cmd_fd)
+{
+	struct rfc_context *context;
+	struct ibv_get_context cmd;
+	struct ib_uverbs_get_context_resp resp;
+
+	context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
+					       RDMA_DRIVER_RXE);
+	if (!context)
+		return NULL;
+
+	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd,
+				sizeof cmd, &resp, sizeof resp))
+		goto out;
+
+	verbs_set_ops(&context->ibv_ctx, &rfc_ctx_ops);
+
+	return &context->ibv_ctx;
+
+out:
+	verbs_uninit_context(&context->ibv_ctx);
+	free(context);
+	return NULL;
+}
+
+static void rfc_free_context(struct ibv_context *ibctx)
+{
+	struct rfc_context *context = to_rctx(ibctx);
+
+	verbs_uninit_context(&context->ibv_ctx);
+	free(context);
+}
+
+static void rfc_uninit_device(struct verbs_device *verbs_device)
+{
+	struct rfc_device *dev = to_rdev(&verbs_device->device);
+
+	free(dev);
+}
+
+static struct verbs_device *rfc_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
+{
+	struct rfc_device *dev;
+	dev = calloc(1, sizeof(*dev));
+	if (!dev)
+		return NULL;
+
+	dev->abi_version = sysfs_dev->abi_ver;
+
+	return &dev->ibv_dev;
+}
+
+static const struct verbs_device_ops rfc_dev_ops = {
+	.name = "rfc",
+	/*
+	 * For 64 bit machines ABI version 1 and 2 are the same. Otherwise 32
+	 * bit machines require ABI version 2 which guarentees the user and
+	 * kernel use the same ABI.
+	 */
+	.match_min_abi_version = sizeof(void *) == 8?1:2,
+	.match_max_abi_version = 2,
+	.match_table = hca_table,
+	.alloc_device = rfc_device_alloc,
+	.uninit_device = rfc_uninit_device,
+	.alloc_context = rfc_alloc_context,
+	.free_context = rfc_free_context,
+};
+PROVIDER_DRIVER(rfc_dev_ops);
diff --git a/providers/rfc/rfc.h b/providers/rfc/rfc.h
new file mode 100644
index 0000000..8313b19
--- /dev/null
+++ b/providers/rfc/rfc.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ * Copyright (c) 2006-2007 QLogic Corp. All rights reserved.
+ * Copyright (c) 2005. PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_H
+#define RXE_H
+
+#include <infiniband/driver.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <rdma/rdma_user_rfc.h> /* struct rfc_av */
+#include "rfc-abi.h"
+
+enum rdma_network_type {
+	RDMA_NETWORK_IB,
+	RDMA_NETWORK_IPV4,
+	RDMA_NETWORK_IPV6
+};
+
+struct rfc_device {
+	struct verbs_device	ibv_dev;
+	int	abi_version;
+};
+
+struct rfc_context {
+	struct verbs_context	ibv_ctx;
+};
+
+struct rfc_cq {
+	struct ibv_cq		ibv_cq;
+	struct mminfo		mmap_info;
+	struct rfc_queue		*queue;
+	pthread_spinlock_t	lock;
+};
+
+struct rfc_ah {
+	struct ibv_ah		ibv_ah;
+	struct rfc_av		av;
+};
+
+struct rfc_wq {
+	struct rfc_queue	*queue;
+	pthread_spinlock_t	lock;
+	unsigned int		max_sge;
+	unsigned int		max_inline;
+};
+
+struct rfc_qp {
+	struct ibv_qp		ibv_qp;
+	struct mminfo		rq_mmap_info;
+	struct rfc_wq		rq;
+	struct mminfo		sq_mmap_info;
+	struct rfc_wq		sq;
+	unsigned int		ssn;
+};
+
+#define qp_type(qp)		((qp)->ibv_qp.qp_type)
+
+struct rfc_srq {
+	struct ibv_srq		ibv_srq;
+	struct mminfo		mmap_info;
+	struct rfc_wq		rq;
+	uint32_t		srq_num;
+};
+
+#define to_rxxx(xxx, type) container_of(ib##xxx, struct rfc_##type, ibv_##xxx)
+
+static inline struct rfc_context *to_rctx(struct ibv_context *ibctx)
+{
+	return container_of(ibctx, struct rfc_context, ibv_ctx.context);
+}
+
+static inline struct rfc_device *to_rdev(struct ibv_device *ibdev)
+{
+	return container_of(ibdev, struct rfc_device, ibv_dev.device);
+}
+
+static inline struct rfc_cq *to_rcq(struct ibv_cq *ibcq)
+{
+	return to_rxxx(cq, cq);
+}
+
+static inline struct rfc_qp *to_rqp(struct ibv_qp *ibqp)
+{
+	return to_rxxx(qp, qp);
+}
+
+static inline struct rfc_srq *to_rsrq(struct ibv_srq *ibsrq)
+{
+	return to_rxxx(srq, srq);
+}
+
+static inline struct rfc_ah *to_rah(struct ibv_ah *ibah)
+{
+	return to_rxxx(ah, ah);
+}
+
+#endif /* RXE_H */
diff --git a/providers/rfc/rfc_cfg.in b/providers/rfc/rfc_cfg.in
new file mode 100755
index 0000000..0a8583d
--- /dev/null
+++ b/providers/rfc/rfc_cfg.in
@@ -0,0 +1,674 @@
+#!/usr/bin/perl
+
+# * Copyright (c) 2009-2011 Mellanox Technologies Ltd. All rights reserved.
+# * Copyright (c) 2009-2011 System Fabric Works, Inc. All rights reserved.
+# *
+# * This software is available to you under a choice of one of two
+# * licenses.  You may choose to be licensed under the terms of the GNU
+# * General Public License (GPL) Version 2, available from the file
+# * COPYING in the main directory of this source tree, or the
+# * OpenIB.org BSD license below:
+# *
+# *     Redistribution and use in source and binary forms, with or
+# *     without modification, are permitted provided that the following
+# *     conditions are met:
+# *
+# *	- Redistributions of source code must retain the above
+# *	  copyright notice, this list of conditions and the following
+# *	  disclaimer.
+# *
+# *	- Redistributions in binary form must reproduce the above
+# *	  copyright notice, this list of conditions and the following
+# *	  disclaimer in the documentation and/or other materials
+# *	  provided with the distribution.
+# *
+# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# * SOFTWARE.
+#
+
+use warnings;
+use strict;
+
+use File::Basename;
+use File::Path qw(make_path);
+use Getopt::Long;
+
+my $help = 0;
+my $no_persist = 0;
+my $debug = 0;
+my $force = 0;
+my $linkonly = 0;
+my $parms = "/sys/module/rdma_rxe/parameters";
+my $modprobe_opt = "";
+my $modprobe_checked = "0";
+my $persistence_path = "@CMAKE_INSTALL_FULL_SHAREDSTATEDIR@/rxe";
+my $persistence_file = "${persistence_path}/rxe";
+my $num_persistent = 0;
+my $sys = "/sys/module/rdma_rxe/parameters";
+my %rxe_names;
+my @rxe_array;
+my %eth_names;
+my @eth_list;
+my %eth_driver;
+my %link_state;
+my %link_speed;
+my %eth_mtu;
+my %ipv4_addr;
+my %rxe_mtu;
+my @persistence_array;
+my %persistence_hash;
+my @mlx4_port;
+my @mlx4_ether;
+my @roce_list;
+
+# Read a file and return its contents as a string.
+sub read_file {
+    my $filename = shift;
+    my $result = "";
+
+    if (open(FILE, $filename)) {
+	$result = <FILE>;
+	close FILE;
+    }
+    return $result;
+}
+
+#get mapping between rxe and eth devices
+sub get_names {
+    my $i = 0;
+    
+    foreach my $rxe (glob("/sys/class/infiniband/rxe*")) {
+	$rxe = basename($rxe);
+	my $eth = read_file("/sys/class/infiniband/$rxe/parent");
+	chomp($eth);
+	
+	if (($eth =~ /[\w]+[\d]/)
+	    && ($rxe =~ /rxe[0123456789]/)) {
+	    
+	    # hash ethername to rxename
+	    $rxe_names{$eth} = $rxe;
+	    $rxe_array[$i++] = $rxe;
+	    
+	    # hash rxename to ethername
+	    $eth_names{$rxe} = $eth;
+	}
+    }
+}
+
+# get list of Mellanox RoCE ports
+sub get_mlx4_list {
+    my $i = 0;
+
+    foreach my $mlx4 (glob("/sys/class/infiniband/mlx4_*")) {
+	$mlx4 = basename($mlx4);
+	foreach my $port (glob("/sys/class/infiniband/$mlx4/ports/*")) {
+	    $port = basename($port);
+	    my $link = read_file("$port/link_layer");
+	    chomp($link);
+
+	    if ($link =~ "Ethernet") {
+		$roce_list[$i++] = "$mlx4:$port";
+	    }
+	}
+    }
+}
+
+#collect per device information
+sub get_dev_info {
+    my @list;
+    my @fields;
+    my @lines;
+    my $line;
+    my $eth;
+    my $drv;
+    my $np;
+    my $i = 0;
+    my $j = 0;
+
+    get_mlx4_list();
+
+    my @my_eth_list = ();
+    foreach my $my_eth_dev (glob("/sys/class/net/*")) {
+	$my_eth_dev = basename($my_eth_dev);
+        my $my_dev_type = read_file("/sys/class/net/${my_eth_dev}/type");
+	chomp($my_dev_type);
+        if ($my_dev_type == "1") {
+            push(@my_eth_list, "$my_eth_dev");
+        }
+    }
+
+    @list = @my_eth_list;
+    foreach $eth (@list) {
+	chomp($eth);
+
+	$eth_list[$i++] = $eth;
+
+	@lines = `ethtool -i $eth`;
+	foreach $line (@lines) {
+	    chomp($line);
+
+	    @fields = split(/\s+/, $line);
+	    chomp($fields[0]);
+
+	    if ($fields[0] =~ /driver:/) {
+		$drv = $fields[1];
+		$eth_driver{$eth} = $drv;
+
+		if ($drv =~ /mlx4_en/ && scalar(@roce_list) > 0 ) {
+		    $eth_names{$roce_list[$j++]} = $eth;
+		}
+	    }
+	}
+
+	# get link status
+	$link_state{$eth} = "";
+	$link_speed{$eth} = "";
+
+	@lines = `ethtool $eth`;
+	foreach $line (@lines) {
+	    chomp($line);
+
+	    @fields = split(/:/, $line);
+	    if (defined($fields[1])) {
+		    $fields[1] =~ s/^\s+//g;
+		    if ($fields[0] =~ "Link detected") {
+			$link_state{$eth} = $fields[1];
+		    }
+	    }
+	    elsif ($line =~ "10000baseT") {
+		$link_speed{$eth} = "10GigE";
+	    }
+	}
+
+	$ipv4_addr{$eth} = "            ";
+	$eth_mtu{$eth} = "";
+
+	@lines = `ifconfig $eth`;
+	foreach $line (@lines) {
+	    # get IP address
+	    if ($line =~ /inet addr/) {
+		$line =~ s/^\s+inet addr://g;
+		@fields = split(/\s+/, $line);
+		$ipv4_addr{$eth} = $fields[0];
+	    }
+
+	    # get ethernet mtu
+	    if ($line =~ /MTU:/) {
+		$line =~ s/^.*MTU://g;
+		@fields = split(/\s+/, $line);
+		$eth_mtu{$eth} = $fields[0];
+	    }
+	}
+    }
+
+    # get rxe mtu
+    foreach my $rxe (@rxe_array) {
+	
+	@lines = `ibv_devinfo -d $rxe`;
+	foreach $line (@lines) {
+	    if ($line =~ "active_mtu") {
+		$line =~ s/^\s+active_mtu:\s+//g;
+		chomp($line);
+
+		$rxe_mtu{$rxe} = $line;
+	    }
+	}
+	$rxe_mtu{$rxe} = "(?)" if (!$rxe_mtu{$rxe});
+    }
+}
+
+# return string or the string "###" if string is all whitespace
+sub set_field {
+    my $fld = $_[0];
+
+    if (defined($fld) && $fld =~ /\S/) {
+        return $fld;
+    } else {
+        return "###";
+    }
+}
+
+# format status output into fixed width columns
+sub status_print {
+    my @fields;
+    my $field;
+    my @flen = ();
+    my $num_fields = 0;
+    my $i;
+    my $pad;
+    my $line;
+
+    # one pass to size the columns
+    foreach $line (@_) {
+	@fields = split(/\s+/, $line);
+	$i = 0;
+	foreach $field (@fields) {
+	    if (!defined($flen[$i])) {
+		$flen[$i] = length($field);
+	    }
+	    else {
+		$flen[$i] = max($flen[$i], length($field));
+	    }
+	    $i++;
+	}
+
+	if ($i > $num_fields) {
+	    $num_fields = $i;
+	}
+    }
+
+    # one pass to print
+    foreach $line (@_) {
+	print "  ";
+	@fields = split(/\s+/, $line);
+	for ($i = 0; $i < $num_fields; $i++) {
+	    if (defined($fields[$i])) {
+	        $pad = $flen[$i] - length($fields[$i]) + 2;
+	    }
+	    else {
+	        $pad = $flen[$i] + 2;
+	    }
+	    if (defined($fields[$i]) && ($fields[$i] ne "###")) {
+		print "$fields[$i]";
+	    }
+	    else {
+		print "   ";
+	    }
+	    printf("%*s", $pad, "");
+	}
+	print "\n";
+    }
+}
+
+# check driver load status
+sub check_module_status {
+    if (-e $sys) {
+	return 0;
+    } else {
+	return 1;
+    }
+}
+
+# print driver load status and ethertype for rdma_rxe and rdma_rxe_net
+sub show_module_status {
+    print "rdma_rxe module not loaded\n" if (!(-e $sys));
+}
+
+# print rxe status
+sub do_status {
+    my $instance = $_[0];
+    my $ln = 0;
+    my @outp;
+    my $rxe;
+    my $rmtu;
+
+    get_names();
+    get_dev_info();
+    show_module_status();
+
+    $outp[$ln++] = "Name\tLink\tDriver\t\tSpeed\tNMTU\tIPv4_addr\tRDEV\tRMTU";
+
+    foreach my $eth (@eth_list) {
+
+	# handle case where rxe_drivers are not loaded
+	if (defined($rxe_names{$eth})) {
+		$rxe = $rxe_names{$eth};
+		$rmtu = $rxe_mtu{$rxe};
+	}
+	else {
+		$rxe = "";
+		$rmtu = "";
+	}
+
+	if ((!defined($instance) 
+	     && (($linkonly == 0) || ($link_state{$eth} =~ "yes")))
+	    || (defined($instance) && ($rxe =~ "$instance"))) {
+	    $outp[$ln] =  set_field("$eth");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$link_state{$eth}");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field(exists($eth_driver{$eth}) ? $eth_driver{$eth} : "");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$link_speed{$eth}");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$eth_mtu{$eth}");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$ipv4_addr{$eth}");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$rxe");
+	    $outp[$ln] .= "\t";
+	    $outp[$ln] .= set_field("$rmtu");
+	    $ln++;
+	}
+    }
+
+    status_print(@outp);
+}
+
+# read file containing list of ethernet devices into a list
+sub populate_persistence {
+    my $i = 0;
+    
+    open FILE, $persistence_file;
+    while(<FILE>) {
+	my $line = $_;
+	chomp($line);
+	$line =~ s/^\s+//g;
+	if ($line =~ /[\w]+[\d]/) {
+	    # in case we add fields later
+	    my ($eth, $cruft) = split(/\s+/, $line, 2);
+	    if ($eth =~ /^[\w]+[\d]/) {
+		$persistence_array[$i] = $eth;
+		$persistence_hash{$eth} = $i++;
+	    }
+	}
+    }
+    close FILE;
+
+    $num_persistent = $i;
+}
+
+# print out list of ethernet devices to file
+sub commit_persistent {
+    my $i;
+    my $eth;
+
+    open(PF, ">$persistence_file");
+    
+    for ($i = 0; $i < $num_persistent; $i++) {
+	$eth = $persistence_array[$i];
+	if ($eth =~ /[\w]+[\d]/) {
+	    print(PF "$persistence_array[$i]\n");
+	}
+    }
+
+    close(PF);
+}
+
+sub delete_persistent {
+    my $eth = $_[0];
+    
+    if (defined($persistence_hash{$eth})) {
+	$persistence_array[$persistence_hash{$eth}] = "";
+    }
+}
+
+sub add_persistent {
+    my $eth = $_[0];
+
+    # Is this one already in the persistence list?
+    if (!defined($persistence_hash{$eth})) {
+	$persistence_array[$num_persistent] = $eth;
+	$persistence_hash{$eth} = $num_persistent;
+	$num_persistent++;
+    }
+}
+
+# add new rxe device to eth if not already up
+sub rxe_add {
+    my $eth = $_[0];
+
+    if (!($eth =~ /[\w]+[\d]/)) {
+	print "eth_name ($eth) looks bogus\n";
+	return;
+    }
+
+    if (!defined($rxe_names{$eth})) {
+	system("echo '$eth' > $parms/add");
+    }
+    if (!$no_persist) {
+	add_persistent($eth);
+	commit_persistent();
+    }
+}
+
+sub rxe_remove {
+    my $arg2 = $_[0];
+    my $rxe;
+    my $eth;
+
+    print "remove $arg2\n"  if ($debug > 0);
+
+    if ($arg2 =~ /[\w]+[\d]/) {
+	$eth = $arg2;
+	$rxe = $rxe_names{$eth};
+    }
+    elsif ($arg2 =~ /rxe[0123456789]/) {
+	$rxe = $arg2;
+	$eth = $eth_names{$rxe};
+    }
+    elsif ($arg2 eq "all") {
+	$rxe = "all";
+    }
+
+    if (($rxe eq "all") || ($rxe =~ /^rxe[0123456789]/)) {
+	my $cmd = "echo '$rxe' > $parms/remove";
+	#print "$cmd\n";
+	system($cmd);
+	if (!$no_persist) {
+	    if ($rxe eq "all") {
+		unlink($persistence_file);
+	    }
+	    elsif ($eth =~/[\w]+[\d]/) {
+		delete_persistent($eth);
+		commit_persistent();
+	    }
+	    else {
+		print "Warning: Unable to resolve ethname; "
+		    . "instance may persist on restart\n";
+	    }
+	}
+    }
+    else {
+	print "rxe instance $rxe not found\n";
+    }
+}
+
+sub get_devinfo {
+    my $rxe = $_[0];
+
+    my $cmd = "ibv_devinfo -d $rxe";
+    return `$cmd`;
+}
+
+# allow unsupported modules to load in SLES11 if allowed
+sub modprobe {
+    my $module = $_[0];
+    my $opts = $_[1];
+    my @lines;
+    my $line;
+
+    if ($modprobe_checked == "0") {
+	@lines = `modprobe -c`;
+	foreach $line (@lines) {
+	    if ($line =~ /^allow_unsupported_modules  *0/) {
+		$modprobe_opt = " --allow-unsupported-modules ";
+		last;
+	    }
+	}
+	$modprobe_checked = "1";
+    }
+
+    if (!defined($opts)) {
+	$opts = "";
+    }
+
+    system("modprobe $modprobe_opt $module $opts");
+}
+
+# bring up rxe
+sub do_start {
+    my $proto_str = "";
+
+    system("mkdir -p $persistence_path");
+    system("touch $persistence_file");
+
+    modprobe("ib_core");
+    modprobe("ib_uverbs");
+    modprobe("rdma_ucm");
+    modprobe("rdma_rxe");
+
+    populate_persistence();
+
+    foreach my $eth (@persistence_array) {
+	rxe_add($eth);
+    }
+
+    get_names();
+
+    foreach my $rxe (@rxe_array) {
+	my $stat = get_devinfo($rxe);
+	if ($stat =~ "PORT_DOWN") {
+	    my $cmd = "ifconfig $eth_names{$rxe} up";
+	    system($cmd);
+	}
+    }
+
+}
+
+# check if argument is an integer
+sub is_integer {
+    defined $_[0] && $_[0] =~ /^[+-]?\d+$/;
+}
+
+# remove all rxe devices and unload drivers
+sub do_stop {
+    my $rxe;
+
+    foreach $rxe (@rxe_array) {
+	system("echo '$rxe' > $sys/remove");
+    }
+
+    if (-e $sys) {
+	system("rmmod rdma_rxe");
+    }
+
+    if (-e $sys) {
+	print "unable to unload drivers, reboot required\n";
+    }
+}
+
+sub do_debug {
+    my $arg2 = $_[0];
+    my $debugfile = "$parms/debug";
+    chomp($arg2);
+
+    if (!(-e "$debugfile")) {
+	print "Error: debug is compiled out of this rxe driver\n";
+	return;
+    }
+
+    if    ($arg2 eq "on")  { system("echo '31' > $debugfile"); }
+    elsif ($arg2 eq "off") { system("echo '0'  > $debugfile"); }
+    elsif ($arg2 eq "0")   { system("echo '0'  > $debugfile"); }
+    elsif ($arg2 eq "")    { }
+	elsif ($arg2 ge "0" && $arg2 le "31") {
+	    system("echo '$arg2' > $debugfile");
+	}
+	else {
+	    print "unrecognized debug cmd ($arg2)\n";
+	}
+
+    my $current = read_file($debugfile);
+    chomp($current);
+    if ($current > 0) {
+	print "Debug is ON ($current)\n";
+    }
+    elsif ($current == 0) {
+	print "Debug is OFF\n";
+    }
+    else {
+	print "Unrecognized debug value\n";
+    }
+}
+
+sub max {
+    my $a = $_[0];
+    my $b = $_[1];
+    return $a if ($a > $b);
+    return $b;
+}
+
+# show usage for rxe_cfg
+sub usage {
+    print "  Usage:\n";
+    print "    rxe_cfg [options] start|stop|status|persistent\n";
+    print "    rxe_cfg debug on|off|<num>\n";
+    print "    rxe_cfg [-n] add <ndev>\n";
+    print "    rxe_cfg [-n] remove <ndev>|<rdev>\n";
+    print "\n";
+    print "    <ndev> = network device e.g. eth3\n";
+    print "    <rdev> = rdma device e.g. rxe1\n";
+    print "\n";
+    print "  Options:\n";
+    print "    -h: print this usage information\n";
+    print "    -n: do not make the configuration action persistent\n";
+    print "    -v: print additional debug output\n";
+    print "    -l: show status for interfaces with link up\n";
+    print "    -p <num>: (start command only) - set ethertype\n";
+}
+
+sub main {
+    GetOptions(
+	   "-h"          => \$help,
+	   "--help"      => \$help,
+	   "-n"          => \$no_persist,
+	   "-v:+"        => \$debug,
+	   "-f"          => \$force,
+	   "-l"          => \$linkonly,
+	   );
+
+    my $arg1 = $ARGV[0];
+    my $arg2 = $ARGV[1];
+    my $arg3 = $ARGV[2];
+
+    # status is the default
+    if (!defined($arg1) || ($arg1 =~ /status/)) {
+        do_status($arg2);
+        exit;
+    }
+
+    if ($help) {
+        usage();
+        exit;
+    }
+
+    # stuff that does not require modules to be loaded
+    if    ($arg1 eq "help")       { usage(); exit; }
+    elsif ($arg1 eq "start")      { do_start(); do_status(); exit; }
+    elsif ($arg1 eq "persistent") { system("cat $persistence_file"); exit; }
+
+
+    # can't do much else, bail if modules aren't loaded
+    if (check_module_status()) {
+	exit;
+    }
+
+    # create persistence file if necessary
+    make_path($persistence_path);
+    if (!(-e $persistence_file)) {
+        `touch $persistence_file`;
+    }
+
+    # Get full context of the configuration
+    populate_persistence();
+    get_names();
+    get_dev_info();
+
+    # Stuff that requires the rdma_rxe module to be loaded
+    if    ($arg1 eq "stop")   { do_stop(); 	   exit; }
+    elsif ($arg1 eq "debug")  { do_debug($arg2);   exit; }
+    elsif ($arg1 eq "add")    { rxe_add($arg2);    exit; }
+    elsif ($arg1 eq "remove") { rxe_remove($arg2); exit; }
+    elsif ($arg1 eq "help")   { usage();	   exit; }
+}
+
+main();
+
+exit;
diff --git a/providers/rfc/rfc_queue.h b/providers/rfc/rfc_queue.h
new file mode 100644
index 0000000..a82e223
--- /dev/null
+++ b/providers/rfc/rfc_queue.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the fileA
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* implements a simple circular buffer with sizes a power of 2 */
+
+#ifndef H_RXE_PCQ
+#define H_RXE_PCQ
+
+#include <stdint.h>
+#include <stdatomic.h>
+
+/* MUST MATCH kernel struct rfc_pqc in rfc_queue.h */
+struct rfc_queue {
+	uint32_t		log2_elem_size;
+	uint32_t		index_mask;
+	uint32_t		pad_1[30];
+	_Atomic(uint32_t)	producer_index;
+	uint32_t		pad_2[31];
+	_Atomic(uint32_t)	consumer_index;
+	uint32_t		pad_3[31];
+	uint8_t			data[0];
+};
+
+static inline int next_index(struct rfc_queue *q, int index)
+{
+	return (index + 1) & q->index_mask;
+}
+
+static inline int queue_empty(struct rfc_queue *q)
+{
+	/* Must hold consumer_index lock */
+	return ((atomic_load(&q->producer_index) -
+		 atomic_load_explicit(&q->consumer_index,
+				      memory_order_relaxed)) &
+		q->index_mask) == 0;
+}
+
+static inline int queue_full(struct rfc_queue *q)
+{
+	/* Must hold producer_index lock */
+	return ((atomic_load_explicit(&q->producer_index,
+				      memory_order_relaxed) +
+		 1 - atomic_load(&q->consumer_index)) &
+		q->index_mask) == 0;
+}
+
+static inline void advance_producer(struct rfc_queue *q)
+{
+	/* Must hold producer_index lock */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(
+	    &q->producer_index,
+	    (atomic_load_explicit(&q->producer_index, memory_order_relaxed) +
+	     1) &
+		q->index_mask);
+}
+
+static inline void advance_consumer(struct rfc_queue *q)
+{
+	/* Must hold consumer_index lock */
+	atomic_store(
+	    &q->consumer_index,
+	    (atomic_load_explicit(&q->consumer_index, memory_order_relaxed) +
+	     1) &
+		q->index_mask);
+}
+
+static inline void *producer_addr(struct rfc_queue *q)
+{
+	/* Must hold producer_index lock */
+	return q->data + ((atomic_load_explicit(&q->producer_index,
+						memory_order_relaxed) &
+			   q->index_mask)
+			  << q->log2_elem_size);
+}
+
+static inline void *consumer_addr(struct rfc_queue *q)
+{
+	/* Must hold consumer_index lock */
+	return q->data + ((atomic_load_explicit(&q->consumer_index,
+						memory_order_relaxed) &
+			   q->index_mask)
+			  << q->log2_elem_size);
+}
+
+static inline void *addr_from_index(struct rfc_queue *q, unsigned int index)
+{
+	return q->data + ((index & q->index_mask)
+				<< q->log2_elem_size);
+}
+
+static inline unsigned int index_from_addr(const struct rfc_queue *q, const void *addr)
+{
+	return (((uint8_t *)addr - q->data) >> q->log2_elem_size) & q->index_mask;
+}
+
+#endif /* H_RXE_PCQ */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux