[PATCH 01/28] ibtrs: add header shared between ibtrs_client and ibtrs_server

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jack Wang <jinpu.wang@xxxxxxxxxxxxxxxx>

Signed-off-by: Jack Wang <jinpu.wang@xxxxxxxxxxxxxxxx>
Signed-off-by: Kleber Souza <kleber.souza@xxxxxxxxxxxxxxxx>
Signed-off-by: Danil Kipnis <danil.kipnis@xxxxxxxxxxxxxxxx>
Signed-off-by: Roman Pen <roman.penyaev@xxxxxxxxxxxxxxxx>
---
 include/rdma/ibtrs.h | 514 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 514 insertions(+)
 create mode 100644 include/rdma/ibtrs.h

diff --git a/include/rdma/ibtrs.h b/include/rdma/ibtrs.h
new file mode 100644
index 0000000..4fc572b
--- /dev/null
+++ b/include/rdma/ibtrs.h
@@ -0,0 +1,514 @@
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler < mail@xxxxxxxxxx>
+ *          Jack Wang <jinpu.wang@xxxxxxxxxxxxxxxx>
+ *   	    Kleber Souza <kleber.souza@xxxxxxxxxxxxxxxx>
+ * 	    Danil Kipnis <danil.kipnis@xxxxxxxxxxxxxxxx>
+ *   	    Roman Pen <roman.penyaev@xxxxxxxxxxxxxxxx>
+ *          Milind Dumbare <Milind.dumbare@xxxxxxxxx>
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef __IBTRS_H
+#define __IBTRS_H
+
+#include <linux/uio.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <linux/list.h>
+#include <linux/dma-direction.h>
+#include <rdma/ib_verbs.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/timekeeping.h>
+
+#define IBTRS_SERVER_PORT 1234
+#define WC_ARRAY_SIZE 16
+#define IB_APM_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */
+
+#define USR_MSG_CNT 64
+#define USR_CON_BUF_SIZE (USR_MSG_CNT * 2) /* double bufs for ACK's */
+
+#define DEFAULT_HEARTBEAT_TIMEOUT_MS 20000
+#define MIN_HEARTBEAT_TIMEOUT_MS 5000
+#define HEARTBEAT_INTV_MS 500
+#define HEARTBEAT_INTV_JIFFIES msecs_to_jiffies(HEARTBEAT_INTV_MS)
+
+#define MIN_RTR_CNT 1
+#define MAX_RTR_CNT 7
+
+/*
+ * With the current size of the tag allocated on the client, 4K is the maximum
+ * number of tags we can allocate. (see IBNBD-2321)
+ * This number is also used on the client to allocate the IU for the user
+ * connection to receive the RDMA addresses from the server.
+ */
+#define MAX_SESS_QUEUE_DEPTH 4096
+
+#define XX(a) case (a): return #a
+
+#define IBTRS_ADDRLEN sizeof("ipv6:[xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx]")
+
+static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
+{
+	switch (opcode) {
+	XX(IB_WC_SEND);
+	XX(IB_WC_RDMA_WRITE);
+	XX(IB_WC_RDMA_READ);
+	XX(IB_WC_COMP_SWAP);
+	XX(IB_WC_FETCH_ADD);
+	/* recv-side); inbound completion */
+	XX(IB_WC_RECV);
+	XX(IB_WC_RECV_RDMA_WITH_IMM);
+	default: return "IB_WC_OPCODE_UNKNOWN";
+	}
+}
+
+
+struct ib_session {
+	struct ib_pd		*pd;
+	struct ib_mr		*mr;
+	struct ib_event_handler	event_handler;
+};
+
+struct ibtrs_ib_path {
+	union ib_gid    p_sgid;
+	union ib_gid    p_dgid;
+};
+
+struct ib_con {
+	struct ib_qp		*qp ____cacheline_aligned;
+	struct ib_cq		*cq ____cacheline_aligned;
+	struct ib_send_wr	beacon;
+	struct rdma_cm_id	*cm_id;
+	struct ibtrs_ib_path    pri_path;
+	struct ibtrs_ib_path   cur_path;
+	char			*addr;
+	char			*hostname;
+};
+
+struct ibtrs_iu {
+	struct list_head        list;
+	dma_addr_t              dma_addr;
+	void                    *buf;
+	size_t                  size;
+	enum dma_data_direction direction;
+	bool			is_msg;
+	u32			tag;
+};
+
+struct ibtrs_heartbeat {
+	atomic64_t	send_ts_ms;
+	atomic64_t	recv_ts_ms;
+	u32		timeout_ms;
+	u32		warn_timeout_ms;
+	char		*addr;
+	char		*hostname;
+};
+
+#define IBTRS_VERSION 2
+#define IBTRS_UUID_SIZE 16
+#define IO_MSG_SIZE 24
+#define IB_IMM_SIZE_BITS 32
+
+#define GCC_DIAGNOSTIC_AWARE ((__GNUC__ > 6))
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wpadded"
+#endif
+
+/**
+ * enum ibtrs_msg_types - IBTRS message types. DO NOT REMOVE OR REORDER!!!
+ * @IBTRS_MSG_SESS_OPEN:	Client requests new session on Server
+ * @IBTRS_MSG_SESS_OPEN_RESP:	Server informs Client about session parameters
+ * @IBTRS_MSG_CON_OPEN:		Client requests new connection to server
+ * @IBTRS_MSG_RDMA_WRITE:	Client writes data per RDMA to Server
+ * @IBTRS_MSG_REQ_RDMA_WRITE:	Client requests data transfer per RDMA
+ * @IBTRS_MSG_USER:		Data transfer per Infiniband message
+ * @IBTRS_MSG_ERR:		Fatal Error happened
+ * @IBTRS_MSG_SESS_INFO:	Client requests about session info
+ */
+enum ibtrs_msg_types {
+	IBTRS_MSG_SESS_OPEN,
+	IBTRS_MSG_SESS_OPEN_RESP,
+	IBTRS_MSG_CON_OPEN,
+	IBTRS_MSG_RDMA_WRITE,
+	IBTRS_MSG_REQ_RDMA_WRITE,
+	IBTRS_MSG_USER,
+	IBTRS_MSG_ERROR,
+	IBTRS_MSG_SESS_INFO,
+};
+
+/**
+ * struct ibtrs_msg_hdr - Common header of all IBTRS messages
+ * @type:	Message type, valid values see: enum ibtrs_msg_types
+ * @tsize:	Total size of transferred data
+ *
+ * Don't move the first 8 padding bytes! It's a workaround for a kernel bug.
+ * See IBNBD-610 for details
+ *
+ * DO NOT CHANGE!
+ */
+struct ibtrs_msg_hdr {
+	u8			__padding1;
+	u8			type;
+	u16			__padding2;
+	u32			tsize;
+};
+
+#define IBTRS_HDR_LEN sizeof(struct ibtrs_msg_hdr)
+
+/**
+ * struct ibtrs_msg_session_open - Opens a new session between client and server
+ * @hdr:	message header
+ * @uuid:	client host identifier, unique until module reload
+ * @ver:	IBTRS protocol version
+ * @con_cnt:    number of connections in this session
+ * @reserved:   reserved fields for future usage, 28 bytes is maximum for
+ *		all IPv6/IPv4 session
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open {
+	struct ibtrs_msg_hdr	hdr;
+	u8			uuid[IBTRS_UUID_SIZE];
+	u8			ver;
+	u8			con_cnt;
+	u8			reserved[30];
+};
+
+/**
+ * struct ibtrs_msg_sess_info
+ * @hdr:		message header
+ * @hostname:		client host name
+ */
+struct ibtrs_msg_sess_info {
+	struct ibtrs_msg_hdr	hdr;
+	u8                      hostname[MAXHOSTNAMELEN];
+};
+
+#define MSG_SESS_INFO_SIZE sizeof(struct ibtrs_msg_sess_info)
+
+/*
+ *  Data Layout in RDMA-Bufs:
+ *
+ * +---------RDMA-BUF--------+
+ * |         Slice N	     |
+ * | +---------------------+ |
+ * | |      I/O data       | |
+ * | |---------------------| |
+ * | |      IBNBD MSG	   | |
+ * | |---------------------| |
+ * | |	    IBTRS MSG	   | |
+ * | +---------------------+ |
+ * +-------------------------+
+ * |	     Slice N+1	     |
+ * | +---------------------+ |
+ * | |       I/O data	   | |
+ * | |---------------------| |
+ * | |	     IBNBD MSG     | |
+ * | |---------------------| |
+ * | |       IBTRS MSG     | |
+ * | +---------------------+ |
+ * +-------------------------+
+ */
+
+#define IBTRS_MSG_RESV_LEN 128
+/**
+ * struct ibtrs_msg_sess_open_resp - Servers response to %IBTRS_MSG_SESS_OPEN
+ * @hdr:	message header
+ * @ver:	IBTRS protocol version
+ * @cnt:	Number of rdma addresses in this message
+ * @rkey:	remote key to allow client to access buffers
+ * @hostname:   hostname of local host
+ * @reserved:    reserved fields for future usage
+ * @max_inflight_msg:  max inflight messages (queue-depth) in this session
+ * @max_io_size:   max io size server supports
+ * @max_req_size:   max infiniband message size server supports
+ * @addr:	rdma addresses of buffers
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open_resp {
+	struct ibtrs_msg_hdr	hdr;
+	u8			ver;
+	u8			__padding1;
+	u16			cnt;
+	u32			rkey;
+	u8                      hostname[MAXHOSTNAMELEN];
+	u8			reserved[IBTRS_MSG_RESV_LEN];
+	u16			max_inflight_msg;
+	u32			max_io_size;
+	u32			max_req_size;
+	u64			addr[];
+};
+
+#define IBTRS_MSG_SESS_OPEN_RESP_LEN(cnt) \
+	(sizeof(struct ibtrs_msg_sess_open_resp) + sizeof(u64) * cnt)
+/**
+ * struct ibtrs_msg_con_open - Opens a new connection between client and server
+ * @hdr:		message header
+ * @uuid:		client host identifier, unique until module reload
+ */
+struct ibtrs_msg_con_open {
+	struct ibtrs_msg_hdr	hdr;
+	u8			uuid[IBTRS_UUID_SIZE];
+};
+
+/**
+ * struct ibtrs_msg_user - Data exchanged a Infiniband message
+ * @hdr:		message header
+ * @payl:		Payload from user user module
+ */
+struct ibtrs_msg_user {
+	struct ibtrs_msg_hdr	hdr;
+	u8			payl[];
+};
+
+/**
+ * struct ibtrs_sg_desc - RDMA-Buffer entry description
+ * @addr:	Address of RDMA destination buffer
+ * @key:	Authorization rkey to write to the buffer
+ * @len:	Size of the buffer
+ */
+struct ibtrs_sg_desc {
+	u64			addr;
+	u32			key;
+	u32			len;
+};
+
+#define IBTRS_SG_DESC_LEN sizeof(struct ibtrs_sg_desc)
+
+/**
+ * struct ibtrs_msg_req_rdma_write - RDMA data transfer request from client
+ * @hdr:		message header
+ * @sg_cnt:		number of @desc entries
+ * @desc:		RDMA bufferst where the server can write the result to
+ */
+struct ibtrs_msg_req_rdma_write {
+	struct ibtrs_msg_hdr	hdr;
+	u32			__padding;
+	u32			sg_cnt;
+	struct ibtrs_sg_desc    desc[];
+};
+
+/**
+ * struct_msg_rdma_write - Message transferred to server with RDMA-Write
+ * @hdr:		message header
+ */
+struct ibtrs_msg_rdma_write {
+	struct ibtrs_msg_hdr	hdr;
+};
+
+/**
+ * struct ibtrs_msg_error - Error message
+ * @hdr:		message header
+ * @errno:		Errno number describing the error
+ */
+struct ibtrs_msg_error {
+	struct ibtrs_msg_hdr	hdr;
+	s32			errno;
+	u32			__padding;
+};
+
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic pop
+#endif
+
+int ibtrs_validate_message(u16 queue_depth, const void *hdr);
+
+void fill_ibtrs_msg_sess_open(struct ibtrs_msg_sess_open *msg, u8 con_cnt,
+			      const uuid_le *uuid);
+
+void fill_ibtrs_msg_con_open(struct ibtrs_msg_con_open *msg,
+			     const uuid_le *uuid);
+
+void fill_ibtrs_msg_sess_info(struct ibtrs_msg_sess_info *msg,
+			      const char *hostname);
+
+void ibtrs_heartbeat_set_send_ts(struct ibtrs_heartbeat *h);
+void ibtrs_set_last_heartbeat(struct ibtrs_heartbeat *h);
+u64 ibtrs_last_heartbeat_diff_ms(const struct ibtrs_heartbeat *h);
+u64 ibtrs_heartbeat_send_ts_diff_ms(const struct ibtrs_heartbeat *h);
+
+void ibtrs_set_heartbeat_timeout(struct ibtrs_heartbeat *h, u32 timeout_ms);
+
+void ibtrs_heartbeat_warn(const struct ibtrs_heartbeat *h);
+
+bool ibtrs_heartbeat_timeout_is_expired(const struct ibtrs_heartbeat *h);
+
+u32 ibtrs_heartbeat_get_send_delay(const struct ibtrs_heartbeat *h);
+u32 ibtrs_heartbeat_get_check_delay(const struct ibtrs_heartbeat *h);
+void ibtrs_iu_put(struct list_head *iu_list, struct ibtrs_iu *iu);
+struct ibtrs_iu *ibtrs_iu_get(struct list_head *iu_list);
+
+struct ibtrs_iu *ibtrs_iu_alloc(u32 tag, size_t size, gfp_t t,
+				struct ib_device *dev,
+				enum dma_data_direction, bool is_msg);
+
+void ibtrs_iu_free(struct ibtrs_iu *iu, enum dma_data_direction dir,
+		   struct ib_device *dev);
+
+int ibtrs_write_empty_imm(struct ib_qp *qp, u32 imm_data,
+			  enum ib_send_flags flags);
+
+int ibtrs_post_send(struct ib_qp *qp, struct ib_mr *mr, struct ibtrs_iu *iu,
+		    u32 size);
+
+int ib_post_rdma_write_imm(struct ib_qp *qp, struct ib_sge *sge,
+			   unsigned int num_sge, u32 rkey, u64 rdma_addr,
+			   u64 wr_id, u32 imm_data, enum ib_send_flags flags);
+
+int ib_post_rdma_write(struct ib_qp *qp, struct ib_sge *sge,
+		       unsigned int num_sge, u32 rkey, u64 rdma_addr,
+		       u64 wr_id);
+int post_beacon(struct ib_con *con);
+/**
+ * ib_session_init() - Create a new IB session
+ */
+int ib_session_init(struct ib_device *dev, struct ib_session *session);
+
+/**
+ * ib_con_init() - initialize and add a ib_con to the session
+ * @con:	&ib_con to initialize
+ * @session:	session the &ib_con is added to
+ * @ctx:	CQ context, returned to the user via completion handler
+ *
+ * Returns 0 on success otherwise a negative errno code
+ */
+int ib_con_init(struct ib_con *con, struct rdma_cm_id *cm_id,
+		u32 max_send_sge,
+		ib_comp_handler comp_handler, void *ctx, int cq_vector,
+		u16 cq_size, u16 wr_queue_size, struct ib_session *session);
+
+int ibtrs_request_cq_notifications(struct ib_con *con);
+
+void ib_con_destroy(struct ib_con *con);
+
+/**
+ * ib_session_destroy() - Free a session
+ * The corresponding &ib_con must have been freed before.
+ */
+void ib_session_destroy(struct ib_session *session);
+
+int ib_get_max_wr_queue_size(struct ib_device *dev);
+
+int ibtrs_addr_to_str(const struct sockaddr_storage *addr, char *buf,
+		      size_t len);
+
+int ibtrs_heartbeat_timeout_validate(int timeout);
+
+/**
+ * kvec_length() - Total number of bytes covered by an kvec.
+ */
+static inline size_t kvec_length(const struct kvec *vec, size_t nr)
+{
+	size_t seg, ret = 0;
+
+	for (seg = 0; seg < nr; seg++)
+		ret += vec[seg].iov_len;
+	return ret;
+}
+
+/**
+ * copy_from_kvec() - Copy kvec to the buffer.
+ */
+static inline void copy_from_kvec(void *data, const struct kvec *vec,
+				  size_t copy)
+{
+	size_t seg, len;
+
+	for (seg = 0; copy; seg++) {
+		len = min(vec[seg].iov_len, copy);
+		memcpy(data, vec[seg].iov_base, len);
+		data += len;
+		copy -= len;
+	}
+}
+
+static inline u64 timespec_to_ms(const struct timespec *ts)
+{
+	return timespec_to_ns(ts) / NSEC_PER_MSEC;
+}
+
+u64 timediff_cur_ms(u64 cur_ms);
+
+void *ibtrs_malloc(size_t size);
+void *ibtrs_zalloc(size_t size);
+
+#define STAT_STORE_FUNC(store, reset) \
+static ssize_t store##_store(struct kobject *kobj, \
+			    struct kobj_attribute *attr, \
+			    const char *buf, size_t count) \
+{ \
+	int ret = -EINVAL; \
+	struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+						  kobj_stats); \
+\
+	if (sysfs_streq(buf, "1")) \
+		ret = reset(sess, true); \
+	else if (sysfs_streq(buf, "0"))\
+		ret = reset(sess, false); \
+	if (ret) \
+		return ret; \
+\
+	return count; \
+}
+
+#define STAT_SHOW_FUNC(show, print) \
+static ssize_t show##_show(struct kobject *kobj, \
+			   struct kobj_attribute *attr, \
+			   char *page) \
+{ \
+	struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+						  kobj_stats); \
+\
+	return print(sess, page, PAGE_SIZE); \
+}
+
+#define STAT_ATTR(stat, print, reset) \
+STAT_STORE_FUNC(stat, reset) \
+STAT_SHOW_FUNC(stat, print) \
+static struct kobj_attribute stat##_attr = \
+		__ATTR(stat, 0644, \
+		       stat##_show, \
+		       stat##_store)
+
+#endif /*__IBTRS_H*/
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux