From: Maxim Chicherin <maximc@xxxxxxxxxxxx> The CMID class represents rdma_cm_id, RDMA Connection Manager. Currently only synchronous data path is supported. Support was added to Context and PD classes to allow creation using rdmacm's API. Signed-off-by: Maxim Chicherin <maximc@xxxxxxxxxxxx> --- buildlib/pyverbs_functions.cmake | 2 +- pyverbs/CMakeLists.txt | 2 + pyverbs/cm_enums.pyx | 1 + pyverbs/cmid.pxd | 25 +++ pyverbs/cmid.pyx | 285 +++++++++++++++++++++++++++++++ pyverbs/device.pyx | 15 +- pyverbs/librdmacm.pxd | 106 ++++++++++++ pyverbs/librdmacm_enums.pxd | 32 ++++ pyverbs/pd.pyx | 24 ++- 9 files changed, 483 insertions(+), 9 deletions(-) create mode 120000 pyverbs/cm_enums.pyx create mode 100755 pyverbs/cmid.pxd create mode 100755 pyverbs/cmid.pyx create mode 100755 pyverbs/librdmacm.pxd create mode 100755 pyverbs/librdmacm_enums.pxd mode change 100644 => 100755 pyverbs/pd.pyx diff --git a/buildlib/pyverbs_functions.cmake b/buildlib/pyverbs_functions.cmake index 4c255054fe94..a494ec16610b 100644 --- a/buildlib/pyverbs_functions.cmake +++ b/buildlib/pyverbs_functions.cmake @@ -25,7 +25,7 @@ function(rdma_cython_module PY_MODULE LINKER_FLAGS) COMPILE_FLAGS "${CMAKE_C_FLAGS} -fPIC -fno-strict-aliasing -Wno-unused-function -Wno-redundant-decls -Wno-shadow -Wno-cast-function-type -Wno-implicit-fallthrough -Wno-unknown-warning -Wno-unknown-warning-option ${NO_VAR_TRACKING_FLAGS}" LIBRARY_OUTPUT_DIRECTORY "${BUILD_PYTHON}/${PY_MODULE}" PREFIX "") - target_link_libraries(${SONAME} LINK_PRIVATE ${PYTHON_LIBRARIES} ibverbs ${LINKER_FLAGS}) + target_link_libraries(${SONAME} LINK_PRIVATE ${PYTHON_LIBRARIES} ibverbs rdmacm ${LINKER_FLAGS}) install(TARGETS ${SONAME} DESTINATION ${CMAKE_INSTALL_PYTHON_ARCH_LIB}/${PY_MODULE}) endforeach() diff --git a/pyverbs/CMakeLists.txt b/pyverbs/CMakeLists.txt index 7bbb5fc841c0..de37025ce324 100755 --- a/pyverbs/CMakeLists.txt +++ b/pyverbs/CMakeLists.txt @@ -4,6 +4,8 @@ rdma_cython_module(pyverbs "" addr.pyx base.pyx + cm_enums.pyx + cmid.pyx cq.pyx device.pyx enums.pyx diff --git a/pyverbs/cm_enums.pyx b/pyverbs/cm_enums.pyx new file mode 120000 index 000000000000..bdab2b585a1d --- /dev/null +++ b/pyverbs/cm_enums.pyx @@ -0,0 +1 @@ +librdmacm_enums.pxd \ No newline at end of file diff --git a/pyverbs/cmid.pxd b/pyverbs/cmid.pxd new file mode 100755 index 000000000000..56bc755daf42 --- /dev/null +++ b/pyverbs/cmid.pxd @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2018, Mellanox Technologies. All rights reserved. See COPYING file + +#cython: language_level=3 + +from pyverbs.base cimport PyverbsObject, PyverbsCM +from libc.string cimport memcpy, memset +from libc.stdlib cimport free, malloc +cimport pyverbs.librdmacm as cm + + +cdef class CMID(PyverbsCM): + cdef cm.rdma_cm_id *id + cdef object ctx + cdef object pd + cpdef close(self) + + +cdef class AddrInfo(PyverbsObject): + cdef cm.rdma_addrinfo *addr_info + cpdef close(self) + + +cdef class ConnParam(PyverbsObject): + cdef cm.rdma_conn_param conn_param \ No newline at end of file diff --git a/pyverbs/cmid.pyx b/pyverbs/cmid.pyx new file mode 100755 index 000000000000..c752feda8781 --- /dev/null +++ b/pyverbs/cmid.pyx @@ -0,0 +1,285 @@ +from pyverbs.pyverbs_error import PyverbsError +from pyverbs.device cimport PortAttr, Context +from pyverbs.qp cimport QPInitAttr, QPAttr +from pyverbs.base import PyverbsRDMAErrno +cimport pyverbs.libibverbs_enums as e +cimport pyverbs.librdmacm_enums as ce +cimport pyverbs.libibverbs as v +cimport pyverbs.librdmacm as cm +from pyverbs.pd cimport PD +from pyverbs.mr cimport MR +from pyverbs.cq cimport WC + + +cdef class ConnParam(PyverbsObject): + + def __cinit__(self, resources=1, depth=1, flow_control=0, retry=5, + rnr_retry=5, srq=0, qp_num=0): + """ + Initialize a ConnParam object over an underlying rdma_conn_param + C object which contains connection parameters. There are a few types of + port spaces in RDMACM: RDMA_PS_TCP, RDMA_PS_UDP, RDMA_PS_IB and + RDMA_PS_IPOIB. RDMA_PS_TCP resembles RC QP connection, which provides + reliable, connection-oriented QP communication. This object applies only + to RDMA_PS_TCP port space. + :param resources: Max outstanding RDMA read and atomic ops that local + side will accept from the remote side. + :param depth: Max outstanding RDMA read and atomic ops that local side + will have to the remote side. + :param flow_control: Specifies if hardware flow control is available. + :param retry: Max number of times that a send, RDMA or atomic op from + the remote peer should be retried. + :param rnr_retry: The maximum number of times that a send operation from + the remote peer should be retried on a connection + after receiving a receiver not ready (RNR) error. + :param srq: Specifies if the QP using shared receive queue, ignored if + the QP created by CMID. + :param qp_num: Specifies the QP number, ignored if the QP created by + CMID. + :return: ConnParam object + """ + memset(&self.conn_param, 0, sizeof(cm.rdma_conn_param)) + self.conn_param.responder_resources = resources + self.conn_param.initiator_depth = depth + self.conn_param.flow_control = flow_control + self.conn_param.retry_count = retry + self.conn_param.rnr_retry_count = rnr_retry + self.conn_param.srq = srq + self.conn_param.qp_num = qp_num + + def __str__(self): + print_format = '{:<4}: {:<4}\n' + return '{}: {}\n'.format('Connection parameters', "") +\ + print_format.format('responder resources', self.conn_param.responder_resources) +\ + print_format.format('initiator depth', self.conn_param.initiator_depth) +\ + print_format.format('flow control', self.conn_param.flow_control) +\ + print_format.format('retry count', self.conn_param.retry_count) +\ + print_format.format('rnr retry count', self.conn_param.rnr_retry_count) +\ + print_format.format('srq', self.conn_param.srq) +\ + print_format.format('qp number', self.conn_param.qp_num) + + +cdef class AddrInfo(PyverbsObject): + def __cinit__(self, node=None, service=None, port_space=0, flags=0): + """ + Initialize an AddrInfo object over an underlying rdma_addrinfo C object. + :param node: Name, dotted-decimal IPv4 or IPv6 hex address to resolve. + :param service: The service name or port number of the address. + :param port_space: RDMA port space used (RDMA_PS_UDP or RDMA_PS_TCP). + :param flags: Hint flags which control the operation. + :return: An AddrInfo object which contains information needed to + establish communication. + """ + cdef char* srvc = NULL + cdef char* address = NULL + cdef cm.rdma_addrinfo hints + cdef cm.rdma_addrinfo *hints_ptr = NULL + + if node is not None: + node = node.encode('utf-8') + address = <char*>node + if service is not None: + service = service.encode('utf-8') + srvc = <char*>service + if port_space != 0: + hints_ptr = &hints + memset(hints_ptr, 0, sizeof(cm.rdma_addrinfo)) + hints.ai_port_space = port_space + hints.ai_flags = flags + ret = cm.rdma_getaddrinfo(address, srvc, hints_ptr, &self.addr_info) + if ret != 0: + raise PyverbsRDMAErrno('Failed to get Address Info') + + def __dealloc__(self): + self.close() + + cpdef close(self): + self.logger.debug('Closing AddrInfo') + if self.addr_info != NULL: + cm.rdma_freeaddrinfo(self.addr_info) + self.addr_info = NULL + + +cdef class CMID(PyverbsCM): + + def __cinit__(self, object creator=None, QPInitAttr qp_init_attr=None, + PD pd=None): + """ + Initialize a CMID object over an underlying rdma_cm_id C object. + This is the main RDMA CM object which provides most of the rdmacm API. + Currently only synchronous RDMA_PS_TCP communication supported. + :param creator: For synchronous communication we need AddrInfo object in + order to establish connection. We allow creator to be + None for inner usage, see get_request method. + :param pd: Optional parameter, a PD to be associated with this CMID. + :param qp_init_attr: Optional initial QP attributes of CMID + associated QP. + :return: CMID object for synchronous communication. + """ + cdef v.ibv_qp_init_attr *init + cdef v.ibv_pd *in_pd = NULL + self.pd = None + self.ctx = None + if creator is None: + return + elif issubclass(type(creator), AddrInfo): + init = NULL if qp_init_attr is None else &qp_init_attr.attr + if pd is not None: + in_pd = pd.pd + self.pd = pd + ret = cm.rdma_create_ep(&self.id, (<AddrInfo>creator).addr_info, + in_pd, init) + if ret != 0: + raise PyverbsRDMAErrno('Failed to create CM ID') + if not (<AddrInfo>creator).addr_info.ai_flags & ce.RAI_PASSIVE: + self.ctx = Context(cmid=self) + if self.pd is None: + self.pd = PD(self) + else: + raise PyverbsRDMAErrno('Cannot create CM ID from {obj}' + .format(obj=type(creator))) + + def __dealloc__(self): + self.close() + + cpdef close(self): + self.logger.debug('Closing CMID') + if self.id != NULL: + cm.rdma_destroy_ep(self.id) + if self.ctx: + (<Context>self.ctx).context = NULL + if self.pd: + (<PD>self.pd).pd = NULL + self.id = NULL + + def get_request(self): + """ + Retrieves the next pending connection request event. The call may only + be used on listening CMIDs operating synchronously. If the call is + successful, a new CMID representing the connection request will be + returned to the user. The new CMID will reference event information + associated with the request until the user calls reject, accept, or + close on the newly created identifier. + :return: New CMID representing the connection request. + """ + to_conn = CMID() + ret = cm.rdma_get_request(self.id, &to_conn.id) + if ret != 0: + raise PyverbsRDMAErrno('Failed to get request, no connection established') + self.ctx = Context(cmid=to_conn) + self.pd = PD(to_conn) + return to_conn + + def reg_msgs(self, size): + """ + Registers a memory region for sending or receiving messages or for + RDMA operations. The registered memory may then be posted to an CMID + using post_send or post_recv methods. + :param size: The total length of the memory to register + :return: registered MR + """ + return MR(self.pd, size, e.IBV_ACCESS_LOCAL_WRITE) + + def listen(self, backlog=0): + """ + Listen for incoming connection requests or datagram service lookup. + The listen is restricted to the locally bound source address. + :param backlog: The backlog of incoming connection requests + :return: None + """ + ret = cm.rdma_listen(self.id, backlog) + if ret != 0: + raise PyverbsRDMAErrno('Listen Failed') + + def connect(self, ConnParam param=None): + """ + Initiates an active connection request to a remote destination. + :param param: Optional connection parameters + :return: None + """ + cdef cm.rdma_conn_param *conn = ¶m.conn_param if param else NULL + ret = cm.rdma_connect(self.id, conn) + if ret != 0: + raise PyverbsRDMAErrno('Failed to Connect') + + def disconnect(self): + """ + Disconnects a connection and transitions any associated QP to error + state. + :return: None + """ + ret = cm.rdma_disconnect(self.id) + if ret != 0: + raise PyverbsRDMAErrno('Failed to Disconnect') + + def accept(self, ConnParam param=None): + """ + Is called from the listening side to accept a connection or datagram + service lookup request. + :param param: Optional connection parameters + :return: None + """ + cdef cm.rdma_conn_param *conn = ¶m.conn_param if param else NULL + ret = cm.rdma_accept(self.id, conn) + if ret != 0: + raise PyverbsRDMAErrno('Failed to Accept Connection') + + def post_recv(self, MR mr not None): + """ + Posts a recv_wr via QP associated with CMID. + Context param of rdma_post_recv C function currently not supported. + :param mr: A valid MR object. + :return: None + """ + ret = cm.rdma_post_recv(self.id, NULL, mr.buf, mr.mr.length, mr.mr) + if ret != 0: + raise PyverbsRDMAErrno('Failed to Post Receive') + + def post_send(self, MR mr not None, flags=v.IBV_SEND_SIGNALED): + """ + Posts a message via QP associated with CMID. + Context param of rdma_post_send C function currently not supported. + :param mr: A valid MR object which contains message to send. + :param flags: flags for send work request. + :return: None + """ + ret = cm.rdma_post_send(self.id, NULL, mr.buf, mr.mr.length, mr.mr, + flags) + if ret != 0: + raise PyverbsRDMAErrno('Failed to Post Send') + + def get_recv_comp(self): + """ + Polls the receive CQ associated with CMID for a work completion. + :return: The retrieved WC or None if there is no completions + """ + cdef v.ibv_wc wc + ret = cm.rdma_get_recv_comp(self.id, &wc) + if ret < 0: + raise PyverbsRDMAErrno('Failed to retrieve receive completion') + elif ret == 0: + return None + return WC(wr_id=wc.wr_id, status=wc.status, opcode=wc.opcode, + vendor_err=wc.vendor_err, byte_len=wc.byte_len, + qp_num=wc.qp_num, src_qp=wc.src_qp, + imm_data=wc.imm_data, wc_flags=wc.wc_flags, + pkey_index=wc.pkey_index, slid=wc.slid, sl=wc.sl, + dlid_path_bits=wc.dlid_path_bits) + + def get_send_comp(self): + """ + Polls the send CQ associated with CMID for a work completion. + :return: The retrieved WC or None if there is no completions + """ + cdef v.ibv_wc wc + ret = cm.rdma_get_send_comp(self.id, &wc) + if ret < 0: + raise PyverbsRDMAErrno('Failed to retrieve send completion') + elif ret == 0: + return None + return WC(wr_id=wc.wr_id, status=wc.status, opcode=wc.opcode, + vendor_err=wc.vendor_err, byte_len=wc.byte_len, + qp_num=wc.qp_num, src_qp=wc.src_qp, + imm_data=wc.imm_data, wc_flags=wc.wc_flags, + pkey_index=wc.pkey_index, slid=wc.slid, sl=wc.sl, + dlid_path_bits=wc.dlid_path_bits) diff --git a/pyverbs/device.pyx b/pyverbs/device.pyx index 58a2aca27fcc..c747822b3b32 100755 --- a/pyverbs/device.pyx +++ b/pyverbs/device.pyx @@ -14,6 +14,7 @@ from .pyverbs_error import PyverbsUserError from pyverbs.base import PyverbsRDMAErrno cimport pyverbs.libibverbs_enums as e cimport pyverbs.libibverbs as v +from pyverbs.cmid cimport CMID from pyverbs.xrcd cimport XRCD from pyverbs.addr cimport GID from pyverbs.mr import DMMR @@ -79,16 +80,22 @@ cdef class Context(PyverbsCM): Initializes a Context object. The function searches the IB devices list for a device with the name provided by the user. If such a device is found, it is opened (unless provider attributes were given). + In case of cmid argument, CMID object already holds an ibv_context + initiated pointer, hence all we have to do is assign this pointer to + Context's object pointer. :param kwargs: Arguments: * *name* (str) The RDMA device's name * *attr* (object) Device-specific attributes, meaning that the device is to be opened by the provider + * *cmid* (CMID) + A CMID object (represents rdma_cm_id struct) :return: None """ cdef int count cdef v.ibv_device **dev_list + cdef CMID cmid self.pds = weakref.WeakSet() self.dms = weakref.WeakSet() @@ -99,7 +106,13 @@ cdef class Context(PyverbsCM): dev_name = kwargs.get('name') provider_attr = kwargs.get('attr') - if dev_name is not None: + cmid = kwargs.get('cmid') + + if cmid is not None: + self.context = cmid.id.verbs + cmid.ctx = self + return + elif dev_name is not None: self.name = dev_name else: raise PyverbsUserError('Device name must be provided') diff --git a/pyverbs/librdmacm.pxd b/pyverbs/librdmacm.pxd new file mode 100755 index 000000000000..935a4ae24e87 --- /dev/null +++ b/pyverbs/librdmacm.pxd @@ -0,0 +1,106 @@ +include 'libibverbs.pxd' +include 'librdmacm_enums.pxd' +from libc.stdint cimport uint8_t, uint32_t + +cdef extern from '<rdma/rdma_cma.h>': + + cdef struct rdma_cm_id: + ibv_context *verbs + rdma_event_channel *channel + void *context + ibv_qp *qp + rdma_port_space ps + uint8_t port_num + rdma_cm_event *event + ibv_comp_channel *send_cq_channel + ibv_cq *send_cq + ibv_comp_channel *recv_cq_channel + ibv_cq *recv_cq + ibv_srq *srq + ibv_pd *pd + ibv_qp_type qp_type + + cdef struct rdma_event_channel: + int fd + + cdef struct rdma_conn_param: + const void *private_data + uint8_t private_data_len + uint8_t responder_resources + uint8_t initiator_depth + uint8_t flow_control + uint8_t retry_count + uint8_t rnr_retry_count + uint8_t srq + uint32_t qp_num + + cdef struct rdma_ud_param: + const void *private_data + uint8_t private_data_len + ibv_ah_attr ah_attr + uint32_t qp_num + uint32_t qkey + + cdef union param: + rdma_conn_param conn + rdma_ud_param ud + + cdef struct rdma_cm_event: + rdma_cm_id *id + rdma_cm_id *listen_id + rdma_cm_event_type event + int status + param param + + cdef struct rdma_addrinfo: + int ai_flags + int ai_family + int ai_qp_type + int ai_port_space + int ai_src_len + int ai_dst_len + sockaddr *ai_src_addr + sockaddr *ai_dst_addr + char *ai_src_canonname + char *ai_dst_canonname + size_t ai_route_len + void *ai_route + size_t ai_connect_len + void *ai_connect + rdma_addrinfo *ai_next + +# These non rdmacm structs defined in one of rdma_cma.h's included header files + cdef struct sockaddr: + unsigned short sa_family + char sa_data[14] + + cdef struct in_addr: + uint32_t s_addr + + cdef struct sockaddr_in: + short sin_family + unsigned short sin_port + in_addr sin_addr + char sin_zero[8] + + int rdma_create_ep(rdma_cm_id **id, rdma_addrinfo *res, + ibv_pd *pd, ibv_qp_init_attr *qp_init_attr) + void rdma_destroy_ep(rdma_cm_id *id) + int rdma_get_request(rdma_cm_id *listen, rdma_cm_id **id) + int rdma_connect(rdma_cm_id *id, rdma_conn_param *conn_param) + int rdma_disconnect(rdma_cm_id *id) + int rdma_listen(rdma_cm_id *id, int backlog) + int rdma_accept(rdma_cm_id *id, rdma_conn_param *conn_param) + int rdma_getaddrinfo(char *node, char *service, rdma_addrinfo *hints, + rdma_addrinfo **res) + void rdma_freeaddrinfo(rdma_addrinfo *res) + +cdef extern from '<rdma/rdma_verbs.h>': + int rdma_post_recv(rdma_cm_id *id, void *context, void *addr, + size_t length, ibv_mr *mr) + int rdma_post_send(rdma_cm_id *id, void *context, void *addr, + size_t length, ibv_mr *mr, int flags) + int rdma_get_send_comp(rdma_cm_id *id, ibv_wc *wc) + int rdma_get_recv_comp(rdma_cm_id *id, ibv_wc *wc) + ibv_mr *rdma_reg_msgs(rdma_cm_id *id, void *addr, size_t length) + int rdma_dereg_mr(ibv_mr *mr) diff --git a/pyverbs/librdmacm_enums.pxd b/pyverbs/librdmacm_enums.pxd new file mode 100755 index 000000000000..22a3648fb4a6 --- /dev/null +++ b/pyverbs/librdmacm_enums.pxd @@ -0,0 +1,32 @@ +cdef extern from '<rdma/rdma_cma.h>': + + cpdef enum rdma_cm_event_type: + RDMA_CM_EVENT_ADDR_RESOLVED + RDMA_CM_EVENT_ADDR_ERROR + RDMA_CM_EVENT_ROUTE_RESOLVED + RDMA_CM_EVENT_ROUTE_ERROR + RDMA_CM_EVENT_CONNECT_REQUEST + RDMA_CM_EVENT_CONNECT_RESPONSE + RDMA_CM_EVENT_CONNECT_ERROR + RDMA_CM_EVENT_UNREACHABLE + RDMA_CM_EVENT_REJECTED + RDMA_CM_EVENT_ESTABLISHED + RDMA_CM_EVENT_DISCONNECTED + RDMA_CM_EVENT_DEVICE_REMOVAL + RDMA_CM_EVENT_MULTICAST_JOIN + RDMA_CM_EVENT_MULTICAST_ERROR + RDMA_CM_EVENT_ADDR_CHANGE + RDMA_CM_EVENT_TIMEWAIT_EXIT + + cpdef enum rdma_port_space: + RDMA_PS_IPOIB + RDMA_PS_TCP + RDMA_PS_UDP + RDMA_PS_IB + + # Hint flags which control the operation. + cpdef enum: + RAI_PASSIVE + RAI_NUMERICHOST + RAI_NOROUTE + RAI_FAMILY diff --git a/pyverbs/pd.pyx b/pyverbs/pd.pyx old mode 100644 new mode 100755 index 46cbb36009ce..d6af58f25980 --- a/pyverbs/pd.pyx +++ b/pyverbs/pd.pyx @@ -2,9 +2,10 @@ # Copyright (c) 2019, Mellanox Technologies. All rights reserved. import weakref -from pyverbs.pyverbs_error import PyverbsRDMAError, PyverbsError +from pyverbs.pyverbs_error import PyverbsUserError, PyverbsError from pyverbs.base import PyverbsRDMAErrno from pyverbs.device cimport Context, DM +from pyverbs.cmid cimport CMID from .mr cimport MR, MW, DMMR from pyverbs.srq cimport SRQ from pyverbs.addr cimport AH @@ -15,18 +16,27 @@ cdef extern from 'errno.h': cdef class PD(PyverbsCM): - def __cinit__(self, Context context not None): + def __cinit__(self, object creator not None): """ Initializes a PD object. A reference for the creating Context is kept so that Python's GC will destroy the objects in the right order. :param context: The Context object creating the PD :return: The newly created PD on success """ - self.pd = v.ibv_alloc_pd(<v.ibv_context*>context.context) - if self.pd == NULL: - raise PyverbsRDMAErrno('Failed to allocate PD', errno) - self.ctx = context - context.add_ref(self) + if issubclass(type(creator), Context): + self.pd = v.ibv_alloc_pd((<Context>creator).context) + if self.pd == NULL: + raise PyverbsRDMAErrno('Failed to allocate PD') + self.ctx = creator + elif issubclass(type(creator), CMID): + cmid = <CMID>creator + self.pd = cmid.id.pd + self.ctx = cmid.ctx + cmid.pd = self + else: + raise PyverbsUserError('Cannot create PD from {type}' + .format(type=type(creator))) + self.ctx.add_ref(self) self.logger.debug('PD: Allocated ibv_pd') self.srqs = weakref.WeakSet() self.mrs = weakref.WeakSet() -- 2.21.0