On 28-Feb-19 17:38, Bernard Metzler wrote: > Hi Gal, > > Many thanks for having a close look! > > -----"Gal Pressman" <galpress@xxxxxxxxxx> wrote: ----- > >> To: "Bernard Metzler" <bmt@xxxxxxxxxxxxxx>, >> <linux-rdma@xxxxxxxxxxxxxxx> >> From: "Gal Pressman" <galpress@xxxxxxxxxx> >> Date: 02/28/2019 03:22PM >> Subject: Re: [PATCH v5 06/13] SIW application interface >> >> On 19-Feb-19 12:08, Bernard Metzler wrote: >>> Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >>> --- >>> drivers/infiniband/sw/siw/siw_ae.c | 121 ++ >>> drivers/infiniband/sw/siw/siw_verbs.c | 1851 >> ++++++++++++++++++++++ >>> drivers/infiniband/sw/siw/siw_verbs.h | 114 ++ >>> include/uapi/rdma/rdma_user_ioctl_cmds.h | 1 + >>> include/uapi/rdma/siw_user.h | 223 +++ >>> 5 files changed, 2310 insertions(+) >>> create mode 100644 drivers/infiniband/sw/siw/siw_ae.c >>> create mode 100644 drivers/infiniband/sw/siw/siw_verbs.c >>> create mode 100644 drivers/infiniband/sw/siw/siw_verbs.h >>> create mode 100644 include/uapi/rdma/siw_user.h >>> >>> diff --git a/drivers/infiniband/sw/siw/siw_ae.c >> b/drivers/infiniband/sw/siw/siw_ae.c >>> new file mode 100644 >>> index 000000000000..10907a8138b8 >>> --- /dev/null >>> +++ b/drivers/infiniband/sw/siw/siw_ae.c >>> @@ -0,0 +1,121 @@ >>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>> +/* >>> + * Software iWARP device driver >>> + * >>> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >>> + * >>> + * Copyright (c) 2008-2018, IBM Corporation >>> + * >>> + * This software is available to you under a choice of one of two >>> + * licenses. You may choose to be licensed under the terms of the >> GNU >>> + * General Public License (GPL) Version 2, available from the file >>> + * COPYING in the main directory of this source tree, or the >>> + * BSD license below: >>> + * >>> + * Redistribution and use in source and binary forms, with or >>> + * without modification, are permitted provided that the >> following >>> + * conditions are met: >>> + * >>> + * - Redistributions of source code must retain the above >> copyright notice, >>> + * this list of conditions and the following disclaimer. >>> + * >>> + * - Redistributions in binary form must reproduce the above >> copyright >>> + * notice, this list of conditions and the following >> disclaimer in the >>> + * documentation and/or other materials provided with the >> distribution. >>> + * >>> + * - Neither the name of IBM nor the names of its contributors >> may be >>> + * used to endorse or promote products derived from this >> software without >>> + * specific prior written permission. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >> OF >>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >> HOLDERS >>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >> AN >>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >> IN >>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> THE >>> + * SOFTWARE. >>> + */ >>> + >>> +#include <linux/errno.h> >>> +#include <linux/types.h> >>> +#include <linux/net.h> >>> +#include <linux/scatterlist.h> >>> +#include <linux/highmem.h> >>> +#include <net/sock.h> >>> +#include <net/tcp_states.h> >>> +#include <net/tcp.h> >>> + >>> +#include <rdma/iw_cm.h> >>> +#include <rdma/ib_verbs.h> >>> +#include <rdma/ib_smi.h> >>> +#include <rdma/ib_user_verbs.h> >>> + >>> +#include "siw.h" >>> +#include "siw_obj.h" >>> +#include "siw_cm.h" >>> + >>> +void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) >>> +{ >>> + struct ib_event event; >>> + struct ib_qp *base_qp = &qp->base_qp; >>> + >>> + /* >>> + * Do not report asynchronous errors on QP which gets >>> + * destroyed via verbs interface (siw_destroy_qp()) >>> + */ >>> + if (qp->attrs.flags & SIW_QP_IN_DESTROY) >>> + return; >>> + >>> + event.event = etype; >>> + event.device = base_qp->device; >>> + event.element.qp = base_qp; >>> + >>> + if (base_qp->event_handler) { >>> + siw_dbg_qp(qp, "reporting event %d\n", etype); >>> + (*base_qp->event_handler)(&event, base_qp->qp_context); >>> + } >>> +} >>> + >>> +void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) >>> +{ >>> + struct ib_event event; >>> + struct ib_cq *base_cq = &cq->base_cq; >>> + >>> + event.event = etype; >>> + event.device = base_cq->device; >>> + event.element.cq = base_cq; >>> + >>> + if (base_cq->event_handler) { >>> + siw_dbg(cq->hdr.sdev, "reporting CQ event %d\n", etype); >>> + (*base_cq->event_handler)(&event, base_cq->cq_context); >>> + } >>> +} >>> + >>> +void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) >>> +{ >>> + struct ib_event event; >>> + struct ib_srq *base_srq = &srq->base_srq; >>> + >>> + event.event = etype; >>> + event.device = base_srq->device; >>> + event.element.srq = base_srq; >>> + >>> + if (base_srq->event_handler) { >>> + siw_dbg(srq->pd->hdr.sdev, "reporting SRQ event %d\n", etype); >>> + (*base_srq->event_handler)(&event, base_srq->srq_context); >>> + } >>> +} >>> + >>> +void siw_port_event(struct siw_device *sdev, u8 port, enum >> ib_event_type etype) >>> +{ >>> + struct ib_event event; >>> + >>> + event.event = etype; >>> + event.device = &sdev->base_dev; >>> + event.element.port_num = port; >>> + >>> + siw_dbg(sdev, "reporting port event %d\n", etype); >>> + >>> + ib_dispatch_event(&event); >>> +} >>> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c >> b/drivers/infiniband/sw/siw/siw_verbs.c >>> new file mode 100644 >>> index 000000000000..4c1fbcf66b5c >>> --- /dev/null >>> +++ b/drivers/infiniband/sw/siw/siw_verbs.c >>> @@ -0,0 +1,1851 @@ >>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>> +/* >>> + * Software iWARP device driver >>> + * >>> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >>> + * >>> + * Copyright (c) 2008-2019, IBM Corporation >>> + * >>> + * This software is available to you under a choice of one of two >>> + * licenses. You may choose to be licensed under the terms of the >> GNU >>> + * General Public License (GPL) Version 2, available from the file >>> + * COPYING in the main directory of this source tree, or the >>> + * BSD license below: >>> + * >>> + * Redistribution and use in source and binary forms, with or >>> + * without modification, are permitted provided that the >> following >>> + * conditions are met: >>> + * >>> + * - Redistributions of source code must retain the above >> copyright notice, >>> + * this list of conditions and the following disclaimer. >>> + * >>> + * - Redistributions in binary form must reproduce the above >> copyright >>> + * notice, this list of conditions and the following >> disclaimer in the >>> + * documentation and/or other materials provided with the >> distribution. >>> + * >>> + * - Neither the name of IBM nor the names of its contributors >> may be >>> + * used to endorse or promote products derived from this >> software without >>> + * specific prior written permission. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >> OF >>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >> HOLDERS >>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >> AN >>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >> IN >>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> THE >>> + * SOFTWARE. >>> + */ >>> + >>> +#include <linux/errno.h> >>> +#include <linux/types.h> >>> +#include <linux/uaccess.h> >>> +#include <linux/vmalloc.h> >>> + >>> +#include <rdma/iw_cm.h> >>> +#include <rdma/ib_verbs.h> >>> +#include <rdma/ib_smi.h> >>> +#include <rdma/ib_user_verbs.h> >>> + >>> +#include "siw.h" >>> +#include "siw_verbs.h" >>> +#include "siw_obj.h" >>> +#include "siw_cm.h" >>> + >>> +static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR+1] = { >>> + [IB_QPS_RESET] = SIW_QP_STATE_IDLE, >>> + [IB_QPS_INIT] = SIW_QP_STATE_IDLE, >>> + [IB_QPS_RTR] = SIW_QP_STATE_RTR, >>> + [IB_QPS_RTS] = SIW_QP_STATE_RTS, >>> + [IB_QPS_SQD] = SIW_QP_STATE_CLOSING, >>> + [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE, >>> + [IB_QPS_ERR] = SIW_QP_STATE_ERROR >>> +}; >>> + >>> +static char ib_qp_state_to_string[IB_QPS_ERR+1][sizeof "RESET"] = >> { >> >> Paranthesis after sizeof please. Not sure if you saw this :), also please keep spaces around the '+' sign. >> >>> + [IB_QPS_RESET] = "RESET", >>> + [IB_QPS_INIT] = "INIT", >>> + [IB_QPS_RTR] = "RTR", >>> + [IB_QPS_RTS] = "RTS", >>> + [IB_QPS_SQD] = "SQD", >>> + [IB_QPS_SQE] = "SQE", >>> + [IB_QPS_ERR] = "ERR" >>> +}; >>> + >>> +static u32 siw_insert_uobj(struct siw_ucontext *uctx, void *vaddr, >> u32 size) >>> +{ >>> + struct siw_uobj *uobj; >>> + u32 key; >>> + >>> + uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); >>> + if (!uobj) >>> + return SIW_INVAL_UOBJ_KEY; >>> + >>> + size = PAGE_ALIGN(size); >>> + >>> + spin_lock(&uctx->uobj_lock); >>> + >>> + if (list_empty(&uctx->uobj_list)) >>> + uctx->uobj_key = 0; >>> + >>> + key = uctx->uobj_key; >>> + if (key > SIW_MAX_UOBJ_KEY) { >>> + spin_unlock(&uctx->uobj_lock); >>> + kfree(uobj); >>> + return SIW_INVAL_UOBJ_KEY; >>> + } >>> + uobj->key = key; >>> + uobj->size = size; >>> + uobj->addr = vaddr; >>> + >>> + uctx->uobj_key += size; /* advance for next object */ >>> + >>> + list_add_tail(&uobj->list, &uctx->uobj_list); >>> + >>> + spin_unlock(&uctx->uobj_lock); >>> + >>> + return key; >>> +} >>> + >>> +static struct siw_uobj *siw_remove_uobj(struct siw_ucontext *uctx, >> u32 key, >>> + u32 size) >>> +{ >>> + struct list_head *pos, *nxt; >>> + >>> + spin_lock(&uctx->uobj_lock); >>> + >>> + list_for_each_safe(pos, nxt, &uctx->uobj_list) { >>> + struct siw_uobj *uobj = list_entry(pos, struct siw_uobj, list); >>> + >>> + if (uobj->key == key && uobj->size == size) { >>> + list_del(&uobj->list); >>> + spin_unlock(&uctx->uobj_lock); >>> + return uobj; >>> + } >>> + } >>> + spin_unlock(&uctx->uobj_lock); >>> + >>> + return NULL; >>> +} >>> + >>> +int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) >>> +{ >>> + struct siw_ucontext *uctx = to_siw_ctx(ctx); >>> + struct siw_uobj *uobj; >>> + u32 key = vma->vm_pgoff << PAGE_SHIFT; >>> + int size = vma->vm_end - vma->vm_start; >>> + int rv = -EINVAL; >>> + >>> + /* >>> + * Must be page aligned >>> + */ >>> + if (vma->vm_start & (PAGE_SIZE - 1)) { >>> + pr_warn("map not page aligned\n"); >>> + goto out; >>> + } >>> + >>> + uobj = siw_remove_uobj(uctx, key, size); >>> + if (!uobj) { >>> + pr_warn("mmap lookup failed: %u, %d\n", key, size); >>> + goto out; >>> + } >> >> EFA used to remove the objects from the list on mmap and we were >> asked by Jason >> to keep them in the list until dealloc_ucontext. This way multiple >> mmaps could >> work as well. > > siw user land library currently does not support multiple mappings of > the same object, will consider in the future. > >> >>> + rv = remap_vmalloc_range(vma, uobj->addr, 0); >>> + if (rv) >>> + pr_warn("remap_vmalloc_range failed: %u, %d\n", key, size); >>> + >>> + kfree(uobj); >>> +out: >>> + return rv; >>> +} >>> + >>> +struct ib_ucontext *siw_alloc_ucontext(struct ib_device *base_dev, >>> + struct ib_udata *udata) >>> +{ >>> + struct siw_ucontext *ctx = NULL; >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + int rv; >>> + >>> + if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); >>> + if (!ctx) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + spin_lock_init(&ctx->uobj_lock); >>> + INIT_LIST_HEAD(&ctx->uobj_list); >>> + ctx->uobj_key = 0; >>> + >>> + ctx->sdev = sdev; >>> + if (udata) { >>> + struct siw_uresp_alloc_ctx uresp; >>> + >>> + memset(&uresp, 0, sizeof(uresp)); >>> + uresp.dev_id = sdev->vendor_part_id; >>> + >>> + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); >> >> All ib_copy_from/to_udata should copy the minimum of sizeof() and >> udata->inlen/outlen. Applies to other places in the code as well. >> > Hmmm, shouldn't better the rdma core take care of that? Sounds like a good idea, I would love to see such patch :). Anyway, this should be fixed here or in the core code. > >>> + if (rv) >>> + goto err_out; >>> + } >>> + siw_dbg(sdev, "success. now %d context(s)\n", >>> + atomic_read(&sdev->num_ctx)); >>> + >>> + return &ctx->ib_ucontext; >>> + >>> +err_out: >>> + kfree(ctx); >>> + >>> + atomic_dec(&sdev->num_ctx); >>> + siw_dbg(sdev, "failure %d. now %d context(s)\n", >>> + rv, atomic_read(&sdev->num_ctx)); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +int siw_dealloc_ucontext(struct ib_ucontext *base_ctx) >>> +{ >>> + struct siw_ucontext *ctx = to_siw_ctx(base_ctx); >>> + >>> + atomic_dec(&ctx->sdev->num_ctx); >>> + kfree(ctx); >>> + return 0; >>> +} >>> + >>> +int siw_query_device(struct ib_device *base_dev, struct >> ib_device_attr *attr, >>> + struct ib_udata *unused) >>> +{ >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + /* >>> + * A process context is needed to report avail memory resources. >>> + */ >>> + if (in_interrupt()) >>> + return -EINVAL; >>> + >>> + memset(attr, 0, sizeof(*attr)); >>> + >>> + attr->max_mr_size = rlimit(RLIMIT_MEMLOCK); /* per process */ >>> + attr->vendor_id = SIW_VENDOR_ID; >>> + attr->vendor_part_id = sdev->vendor_part_id; >>> + attr->max_qp = sdev->attrs.max_qp; >>> + attr->max_qp_wr = sdev->attrs.max_qp_wr; >>> + >>> + attr->max_qp_rd_atom = sdev->attrs.max_ord; >>> + attr->max_qp_init_rd_atom = sdev->attrs.max_ird; >>> + attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird; >>> + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS >>> + | IB_DEVICE_ALLOW_USER_UNREG; >>> + attr->max_send_sge = sdev->attrs.max_sge; >>> + attr->max_recv_sge = sdev->attrs.max_sge; >>> + attr->max_sge_rd = sdev->attrs.max_sge_rd; >>> + attr->max_cq = sdev->attrs.max_cq; >>> + attr->max_cqe = sdev->attrs.max_cqe; >>> + attr->max_mr = sdev->attrs.max_mr; >>> + attr->max_pd = sdev->attrs.max_pd; >>> + attr->max_mw = sdev->attrs.max_mw; >>> + attr->max_fmr = sdev->attrs.max_fmr; >>> + attr->max_srq = sdev->attrs.max_srq; >>> + attr->max_srq_wr = sdev->attrs.max_srq_wr; >>> + attr->max_srq_sge = sdev->attrs.max_srq_sge; >>> + attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; >>> + attr->page_size_cap = PAGE_SIZE; >>> + /* Revisit if RFC 7306 gets supported */ >>> + attr->atomic_cap = 0; >>> + >>> + memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6); >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * Approximate translation of real MTU for IB. >>> + */ >>> +static inline enum ib_mtu siw_mtu_net2base(unsigned short mtu) >>> +{ >>> + if (mtu >= 4096) >>> + return IB_MTU_4096; >>> + if (mtu >= 2048) >>> + return IB_MTU_2048; >>> + if (mtu >= 1024) >>> + return IB_MTU_1024; >>> + if (mtu >= 512) >>> + return IB_MTU_512; >>> + if (mtu >= 256) >>> + return IB_MTU_256; >>> + return IB_MTU_4096; >>> +} >> >> ib_mtu_int_to_enum()? >> > > yes, thanks, I was not aware of it... >>> + >>> +int siw_query_port(struct ib_device *base_dev, u8 port, >>> + struct ib_port_attr *attr) >>> +{ >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + >>> + memset(attr, 0, sizeof(*attr)); >>> + >>> + attr->state = sdev->state; >>> + attr->max_mtu = siw_mtu_net2base(sdev->netdev->mtu); >>> + attr->active_mtu = attr->max_mtu; >>> + attr->gid_tbl_len = 1; >>> + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; >>> + attr->max_msg_sz = -1; >>> + attr->pkey_tbl_len = 1; >>> + attr->active_width = 2; >>> + attr->active_speed = 2; >>> + attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3; >>> + /* >>> + * All zero >>> + * >>> + * attr->lid = 0; >>> + * attr->bad_pkey_cntr = 0; >>> + * attr->qkey_viol_cntr = 0; >>> + * attr->sm_lid = 0; >>> + * attr->lmc = 0; >>> + * attr->max_vl_num = 0; >>> + * attr->sm_sl = 0; >>> + * attr->subnet_timeout = 0; >>> + * attr->init_type_repy = 0; >>> + */ >>> + return 0; >>> +} >>> + >>> +int siw_get_port_immutable(struct ib_device *base_dev, u8 port, >>> + struct ib_port_immutable *port_immutable) >>> +{ >>> + struct ib_port_attr attr; >>> + int rv = siw_query_port(base_dev, port, &attr); >>> + >>> + if (rv) >>> + return rv; >>> + >>> + port_immutable->pkey_tbl_len = attr.pkey_tbl_len; >>> + port_immutable->gid_tbl_len = attr.gid_tbl_len; >>> + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; >>> + >>> + return 0; >>> +} >>> + >>> +int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, >> u16 *pkey) >>> +{ >>> + /* Report the default pkey */ >>> + *pkey = 0xffff; >>> + return 0; >>> +} >>> + >>> +int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, >>> + union ib_gid *gid) >>> +{ >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + >>> + /* subnet_prefix == interface_id == 0; */ >>> + memset(gid, 0, sizeof(*gid)); >>> + memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); >>> + >>> + return 0; >>> +} >>> + >>> +int siw_alloc_pd(struct ib_pd *base_pd, struct ib_ucontext >> *context, >>> + struct ib_udata *udata) >>> +{ >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_device *sdev = to_siw_dev(base_pd->device); >>> + >>> + if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) >>> + goto err_out; >>> + >>> + if (siw_pd_add(sdev, pd)) >>> + goto err_out; >>> + >>> + siw_dbg(sdev, "success. now %d PD's(s)\n", >>> + atomic_read(&sdev->num_pd)); >>> + >>> + return 0; >>> + >>> +err_out: >>> + atomic_dec(&sdev->num_pd); >>> + siw_dbg(sdev, "failed. now %d PD's(s)\n", >>> + atomic_read(&sdev->num_pd)); >>> + >>> + return -ENOMEM; >>> +} >>> + >>> +void siw_dealloc_pd(struct ib_pd *base_pd) >>> +{ >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_device *sdev = to_siw_dev(base_pd->device); >>> + >>> + siw_remove_obj(&sdev->lock, &sdev->pd_idr, &pd->hdr); >>> + siw_pd_put(pd); >>> +} >>> + >>> +void siw_qp_get_ref(struct ib_qp *base_qp) >>> +{ >>> + struct siw_qp *qp = to_siw_qp(base_qp); >>> + >>> + siw_dbg_qp(qp, "get user reference\n"); >>> + siw_qp_get(qp); >>> +} >>> + >>> +void siw_qp_put_ref(struct ib_qp *base_qp) >>> +{ >>> + struct siw_qp *qp = to_siw_qp(base_qp); >>> + >>> + siw_dbg_qp(qp, "put user reference\n"); >>> + siw_qp_put(qp); >>> +} >>> + >>> +/* >>> + * siw_create_qp() >>> + * >>> + * Create QP of requested size on given device. >>> + * >>> + * @base_pd: Base PD contained in siw PD >>> + * @attrs: Initial QP attributes. >>> + * @udata: used to provide QP ID, SQ and RQ size back to user. >>> + */ >>> + >>> +struct ib_qp *siw_create_qp(struct ib_pd *base_pd, >>> + struct ib_qp_init_attr *attrs, >>> + struct ib_udata *udata) >>> +{ >>> + struct siw_qp *qp = NULL; >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct ib_device *base_dev = base_pd->device; >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + struct siw_cq *scq = NULL, *rcq = NULL; >>> + >>> + unsigned long flags; >>> + int num_sqe, num_rqe, rv = 0; >>> + >>> + siw_dbg(sdev, "create new qp\n"); >>> + >>> + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { >>> + siw_dbg(sdev, "too many qp's\n"); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (attrs->qp_type != IB_QPT_RC) { >>> + siw_dbg(sdev, "only rc qp's supported\n"); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || >>> + (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || >>> + (attrs->cap.max_send_sge > SIW_MAX_SGE) || >>> + (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { >>> + siw_dbg(sdev, "qp size error\n"); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { >>> + siw_dbg(sdev, "max inline send: %d > %d\n", >>> + attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + /* >>> + * NOTE: we allow for zero element SQ and RQ WQE's SGL's >>> + * but not for a QP unable to hold any WQE (SQ + RQ) >>> + */ >>> + if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { >>> + siw_dbg(sdev, "qp must have send or receive queue\n"); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + >>> + scq = siw_cq_id2obj(sdev, ((struct siw_cq >> *)attrs->send_cq)->hdr.id); >>> + rcq = siw_cq_id2obj(sdev, ((struct siw_cq >> *)attrs->recv_cq)->hdr.id); >>> + >>> + if (!scq || (!rcq && !attrs->srq)) { >>> + siw_dbg(sdev, "send cq or receive cq invalid\n"); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + qp = kzalloc(sizeof(*qp), GFP_KERNEL); >>> + if (!qp) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + >>> + init_rwsem(&qp->state_lock); >>> + spin_lock_init(&qp->sq_lock); >>> + spin_lock_init(&qp->rq_lock); >>> + spin_lock_init(&qp->orq_lock); >>> + >>> + if (!base_pd->uobject) >> >> This should be tested using 'if (udata)'. >> > okay, will do > >>> + qp->kernel_verbs = 1; >>> + >>> + rv = siw_qp_add(sdev, qp); >>> + if (rv) >>> + goto err_out; >>> + >>> + num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); >>> + num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); >>> + >>> + if (qp->kernel_verbs) >>> + qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); >>> + else >>> + qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); >>> + >>> + if (qp->sendq == NULL) { >>> + siw_dbg_qp(qp, "send queue size %d alloc failed\n", num_sqe); >>> + rv = -ENOMEM; >>> + goto err_out_idr; >>> + } >>> + if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) { >>> + if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) >>> + qp->attrs.flags |= SIW_SIGNAL_ALL_WR; >>> + else { >>> + rv = -EINVAL; >>> + goto err_out_idr; >>> + } >>> + } >>> + qp->pd = pd; >>> + qp->scq = scq; >>> + qp->rcq = rcq; >>> + >>> + if (attrs->srq) { >>> + /* >>> + * SRQ support. >>> + * Verbs 6.3.7: ignore RQ size, if SRQ present >>> + * Verbs 6.3.5: do not check PD of SRQ against PD of QP >>> + */ >>> + qp->srq = to_siw_srq(attrs->srq); >>> + qp->attrs.rq_size = 0; >>> + siw_dbg_qp(qp, "[SRQ 0x%p] attached\n", qp->srq); >>> + } else if (num_rqe) { >>> + if (qp->kernel_verbs) >>> + qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); >>> + else >>> + qp->recvq = vmalloc_user(num_rqe * >>> + sizeof(struct siw_rqe)); >>> + >>> + if (qp->recvq == NULL) { >>> + siw_dbg_qp(qp, "recv queue size %d alloc failed\n", >>> + num_rqe); >>> + rv = -ENOMEM; >>> + goto err_out_idr; >>> + } >>> + >>> + qp->attrs.rq_size = num_rqe; >>> + } >>> + qp->attrs.sq_size = num_sqe; >>> + qp->attrs.sq_max_sges = attrs->cap.max_send_sge; >>> + qp->attrs.rq_max_sges = attrs->cap.max_recv_sge; >>> + >>> + /* Make those two tunables fixed for now. */ >>> + qp->tx_ctx.gso_seg_limit = gso_seg_limit; >>> + qp->tx_ctx.zcopy_tx = zcopy_tx; >>> + >>> + qp->attrs.state = SIW_QP_STATE_IDLE; >>> + >>> + if (udata) { >>> + struct siw_uresp_create_qp uresp; >>> + struct siw_ucontext *ctx; >>> + >>> + memset(&uresp, 0, sizeof(uresp)); >>> + ctx = to_siw_ctx(base_pd->uobject->context); >>> + >>> + uresp.sq_key = uresp.rq_key = SIW_INVAL_UOBJ_KEY; >>> + uresp.num_sqe = num_sqe; >>> + uresp.num_rqe = num_rqe; >>> + uresp.qp_id = QP_ID(qp); >>> + >>> + if (qp->sendq) { >>> + uresp.sq_key = siw_insert_uobj(ctx, qp->sendq, >>> + num_sqe * sizeof(struct siw_sqe)); >>> + if (uresp.sq_key > SIW_MAX_UOBJ_KEY) >>> + siw_dbg_qp(qp, "preparing mmap sq failed\n"); >>> + } >>> + if (qp->recvq) { >>> + uresp.rq_key = siw_insert_uobj(ctx, qp->recvq, >>> + num_rqe * sizeof(struct siw_rqe)); >>> + if (uresp.rq_key > SIW_MAX_UOBJ_KEY) >>> + siw_dbg_qp(qp, "preparing mmap rq failed\n"); >>> + } >>> + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); >>> + if (rv) >>> + goto err_out_idr; >>> + } >>> + qp->tx_cpu = siw_get_tx_cpu(sdev); >>> + if (qp->tx_cpu < 0) { >>> + rv = -EINVAL; >>> + goto err_out_idr; >>> + } >>> + qp->base_qp.qp_num = QP_ID(qp); >>> + >>> + siw_pd_get(pd); >>> + >>> + INIT_LIST_HEAD(&qp->devq); >>> + spin_lock_irqsave(&sdev->lock, flags); >>> + list_add_tail(&qp->devq, &sdev->qp_list); >>> + spin_unlock_irqrestore(&sdev->lock, flags); >>> + >>> + return &qp->base_qp; >>> + >>> +err_out_idr: >>> + siw_remove_obj(&sdev->lock, &sdev->qp_idr, &qp->hdr); >>> +err_out: >>> + if (scq) >>> + siw_cq_put(scq); >>> + if (rcq) >>> + siw_cq_put(rcq); >>> + >>> + if (qp) { >>> + if (qp->sendq) >>> + vfree(qp->sendq); >>> + if (qp->recvq) >>> + vfree(qp->recvq); >>> + kfree(qp); >>> + } >>> + atomic_dec(&sdev->num_qp); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +/* >>> + * Minimum siw_query_qp() verb interface. >>> + * >>> + * @qp_attr_mask is not used but all available information is >> provided >>> + */ >>> +int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr >> *qp_attr, >>> + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) >>> +{ >>> + struct siw_qp *qp; >>> + struct siw_device *sdev; >>> + >>> + if (base_qp && qp_attr && qp_init_attr) { >>> + qp = to_siw_qp(base_qp); >>> + sdev = to_siw_dev(base_qp->device); >>> + } else >>> + return -EINVAL; >>> + >>> + qp_attr->cap.max_inline_data = SIW_MAX_INLINE; >>> + qp_attr->cap.max_send_wr = qp->attrs.sq_size; >>> + qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; >>> + qp_attr->cap.max_recv_wr = qp->attrs.rq_size; >>> + qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; >>> + qp_attr->path_mtu = siw_mtu_net2base(sdev->netdev->mtu); >>> + qp_attr->max_rd_atomic = qp->attrs.irq_size; >>> + qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; >>> + >>> + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | >>> + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; >>> + >>> + qp_init_attr->qp_type = base_qp->qp_type; >>> + qp_init_attr->send_cq = base_qp->send_cq; >>> + qp_init_attr->recv_cq = base_qp->recv_cq; >>> + qp_init_attr->srq = base_qp->srq; >>> + >>> + qp_init_attr->cap = qp_attr->cap; >>> + >>> + return 0; >>> +} >>> + >>> +int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr >> *attr, >>> + int attr_mask, struct ib_udata *udata) >>> +{ >>> + struct siw_qp_attrs new_attrs; >>> + enum siw_qp_attr_mask siw_attr_mask = 0; >>> + struct siw_qp *qp = to_siw_qp(base_qp); >>> + int rv = 0; >>> + >>> + if (!attr_mask) >>> + return 0; >>> + >>> + memset(&new_attrs, 0, sizeof(new_attrs)); >>> + >>> + if (attr_mask & IB_QP_ACCESS_FLAGS) { >>> + >>> + siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS; >>> + >>> + if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) >>> + new_attrs.flags |= SIW_RDMA_READ_ENABLED; >>> + if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) >>> + new_attrs.flags |= SIW_RDMA_WRITE_ENABLED; >>> + if (attr->qp_access_flags & IB_ACCESS_MW_BIND) >>> + new_attrs.flags |= SIW_RDMA_BIND_ENABLED; >>> + } >>> + if (attr_mask & IB_QP_STATE) { >>> + siw_dbg_qp(qp, "desired ib qp state: %s\n", >>> + ib_qp_state_to_string[attr->qp_state]); >>> + >>> + new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state]; >>> + >>> + if (new_attrs.state > SIW_QP_STATE_RTS) >>> + qp->tx_ctx.tx_suspend = 1; >>> + >>> + siw_attr_mask |= SIW_QP_ATTR_STATE; >>> + } >>> + if (!attr_mask) >>> + goto out; >> >> Remove, already checked at the beginning of the function. >> > good catch, thanks! should be (!siw_attr_mask) here.... Oh, now that makes sense. Shouldn't you return some kind of error (-EOPNOTSUPP?) in case the user requested for something you don't support? > >>> + >>> + down_write(&qp->state_lock); >>> + >>> + rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask); >>> + >>> + up_write(&qp->state_lock); >>> +out: >>> + return rv; >>> +} >>> + >>> +int siw_destroy_qp(struct ib_qp *base_qp) >>> +{ >>> + struct siw_qp *qp = to_siw_qp(base_qp); >>> + struct siw_qp_attrs qp_attrs; >>> + >>> + siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep); >>> + >>> + /* >>> + * Mark QP as in process of destruction to prevent from >>> + * any async callbacks to RDMA core >>> + */ >>> + qp->attrs.flags |= SIW_QP_IN_DESTROY; >>> + qp->rx_ctx.rx_suspend = 1; >>> + >>> + down_write(&qp->state_lock); >>> + >>> + qp_attrs.state = SIW_QP_STATE_ERROR; >>> + (void)siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE); >>> + >>> + if (qp->cep) { >>> + siw_cep_put(qp->cep); >>> + qp->cep = NULL; >>> + } >>> + >>> + up_write(&qp->state_lock); >>> + >>> + kfree(qp->rx_ctx.mpa_crc_hd); >>> + kfree(qp->tx_ctx.mpa_crc_hd); >>> + >>> + /* Drop references */ >>> + siw_cq_put(qp->scq); >>> + siw_cq_put(qp->rcq); >>> + siw_pd_put(qp->pd); >>> + qp->scq = qp->rcq = NULL; >>> + >>> + siw_qp_put(qp); >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * siw_copy_sgl() >>> + * >>> + * Copy SGL from RDMA core representation to local >>> + * representation. >>> + */ >>> +static inline void siw_copy_sgl(struct ib_sge *sge, struct siw_sge >> *siw_sge, >>> + int num_sge) >>> +{ >>> + while (num_sge--) { >>> + siw_sge->laddr = sge->addr; >>> + siw_sge->length = sge->length; >>> + siw_sge->lkey = sge->lkey; >>> + >>> + siw_sge++; sge++; >> >> Split to two lines please. >> > yes. > >>> + } >>> +} >>> + >>> +/* >>> + * siw_copy_inline_sgl() >>> + * >>> + * Prepare sgl of inlined data for sending. For userland callers >>> + * function checks if given buffer addresses and len's are within >>> + * process context bounds. >>> + * Data from all provided sge's are copied together into the wqe, >>> + * referenced by a single sge. >>> + */ >>> +static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, >>> + struct siw_sqe *sqe) >>> +{ >>> + struct ib_sge *core_sge = core_wr->sg_list; >>> + void *kbuf = &sqe->sge[1]; >>> + int num_sge = core_wr->num_sge, >>> + bytes = 0; >>> + >>> + sqe->sge[0].laddr = (u64)kbuf; >>> + sqe->sge[0].lkey = 0; >>> + >>> + while (num_sge--) { >>> + if (!core_sge->length) { >>> + core_sge++; >>> + continue; >>> + } >>> + bytes += core_sge->length; >>> + if (bytes > SIW_MAX_INLINE) { >>> + bytes = -EINVAL; >>> + break; >>> + } >>> + memcpy(kbuf, (void *)(uintptr_t)core_sge->addr, >>> + core_sge->length); >>> + >>> + kbuf += core_sge->length; >>> + core_sge++; >>> + } >>> + sqe->sge[0].length = bytes > 0 ? bytes : 0; >>> + sqe->num_sge = bytes > 0 ? 1 : 0; >>> + >>> + return bytes; >>> +} >>> + >>> +/* >>> + * siw_post_send() >>> + * >>> + * Post a list of S-WR's to a SQ. >>> + * >>> + * @base_qp: Base QP contained in siw QP >>> + * @wr: Null terminated list of user WR's >>> + * @bad_wr: Points to failing WR in case of synchronous failure. >>> + */ >>> +int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr >> *wr, >>> + const struct ib_send_wr **bad_wr) >>> +{ >>> + struct siw_qp *qp = to_siw_qp(base_qp); >>> + struct siw_wqe *wqe = tx_wqe(qp); >>> + >>> + unsigned long flags; >>> + int rv = 0; >>> + >>> + siw_dbg_qp(qp, "state %d\n", qp->attrs.state); >>> + >>> + /* >>> + * Try to acquire QP state lock. Must be non-blocking >>> + * to accommodate kernel clients needs. >>> + */ >>> + if (!down_read_trylock(&qp->state_lock)) { >>> + *bad_wr = wr; >>> + return -ENOTCONN; >>> + } >>> + >>> + if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { >>> + up_read(&qp->state_lock); >>> + *bad_wr = wr; >>> + return -ENOTCONN; >>> + } >>> + if (wr && !qp->kernel_verbs) { >>> + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); >>> + up_read(&qp->state_lock); >>> + *bad_wr = wr; >>> + return -EINVAL; >>> + } >>> + >>> + spin_lock_irqsave(&qp->sq_lock, flags); >>> + >>> + while (wr) { >>> + u32 idx = qp->sq_put % qp->attrs.sq_size; >>> + struct siw_sqe *sqe = &qp->sendq[idx]; >>> + >>> + if (sqe->flags) { >>> + siw_dbg_qp(qp, "sq full\n"); >>> + rv = -ENOMEM; >>> + break; >>> + } >>> + if (wr->num_sge > qp->attrs.sq_max_sges) { >>> + siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); >>> + rv = -EINVAL; >>> + break; >>> + } >>> + sqe->id = wr->wr_id; >>> + >>> + if ((wr->send_flags & IB_SEND_SIGNALED) || >>> + (qp->attrs.flags & SIW_SIGNAL_ALL_WR)) >>> + sqe->flags |= SIW_WQE_SIGNALLED; >>> + >>> + if (wr->send_flags & IB_SEND_FENCE) >>> + sqe->flags |= SIW_WQE_READ_FENCE; >>> + >>> + switch (wr->opcode) { >>> + >>> + case IB_WR_SEND: >>> + case IB_WR_SEND_WITH_INV: >>> + if (wr->send_flags & IB_SEND_SOLICITED) >>> + sqe->flags |= SIW_WQE_SOLICITED; >>> + >>> + if (!(wr->send_flags & IB_SEND_INLINE)) { >>> + siw_copy_sgl(wr->sg_list, sqe->sge, >>> + wr->num_sge); >>> + sqe->num_sge = wr->num_sge; >>> + } else { >>> + rv = siw_copy_inline_sgl(wr, sqe); >>> + if (rv <= 0) { >>> + rv = -EINVAL; >>> + break; >>> + } >>> + sqe->flags |= SIW_WQE_INLINE; >>> + sqe->num_sge = 1; >>> + } >>> + if (wr->opcode == IB_WR_SEND) >>> + sqe->opcode = SIW_OP_SEND; >>> + else { >>> + sqe->opcode = SIW_OP_SEND_REMOTE_INV; >>> + sqe->rkey = wr->ex.invalidate_rkey; >>> + } >>> + break; >>> + >>> + case IB_WR_RDMA_READ_WITH_INV: >>> + case IB_WR_RDMA_READ: >>> + /* >>> + * OFED WR restricts RREAD sink to SGL containing >>> + * 1 SGE only. we could relax to SGL with multiple >>> + * elements referring the SAME ltag or even sending >>> + * a private per-rreq tag referring to a checked >>> + * local sgl with MULTIPLE ltag's. would be easy >>> + * to do... >>> + */ >>> + if (unlikely(wr->num_sge != 1)) { >>> + rv = -EINVAL; >>> + break; >>> + } >>> + siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1); >>> + /* >>> + * NOTE: zero length RREAD is allowed! >>> + */ >>> + sqe->raddr = rdma_wr(wr)->remote_addr; >>> + sqe->rkey = rdma_wr(wr)->rkey; >>> + sqe->num_sge = 1; >>> + >>> + if (wr->opcode == IB_WR_RDMA_READ) >>> + sqe->opcode = SIW_OP_READ; >>> + else >>> + sqe->opcode = SIW_OP_READ_LOCAL_INV; >>> + break; >>> + >>> + case IB_WR_RDMA_WRITE: >>> + if (!(wr->send_flags & IB_SEND_INLINE)) { >>> + siw_copy_sgl(wr->sg_list, &sqe->sge[0], >>> + wr->num_sge); >>> + sqe->num_sge = wr->num_sge; >>> + } else { >>> + rv = siw_copy_inline_sgl(wr, sqe); >>> + if (unlikely(rv < 0)) { >>> + rv = -EINVAL; >>> + break; >>> + } >>> + sqe->flags |= SIW_WQE_INLINE; >>> + sqe->num_sge = 1; >>> + } >>> + sqe->raddr = rdma_wr(wr)->remote_addr; >>> + sqe->rkey = rdma_wr(wr)->rkey; >>> + sqe->opcode = SIW_OP_WRITE; >>> + >>> + break; >>> + >>> + case IB_WR_REG_MR: >>> + sqe->base_mr = (uint64_t)reg_wr(wr)->mr; >>> + sqe->rkey = reg_wr(wr)->key; >>> + sqe->access = SIW_MEM_LREAD; >>> + if (reg_wr(wr)->access & IB_ACCESS_LOCAL_WRITE) >>> + sqe->access |= SIW_MEM_LWRITE; >>> + if (reg_wr(wr)->access & IB_ACCESS_REMOTE_WRITE) >>> + sqe->access |= SIW_MEM_RWRITE; >>> + if (reg_wr(wr)->access & IB_ACCESS_REMOTE_READ) >>> + sqe->access |= SIW_MEM_RREAD; >>> + sqe->opcode = SIW_OP_REG_MR; >>> + >>> + break; >>> + >>> + case IB_WR_LOCAL_INV: >>> + sqe->rkey = wr->ex.invalidate_rkey; >>> + sqe->opcode = SIW_OP_INVAL_STAG; >>> + >>> + break; >>> + >>> + default: >>> + siw_dbg_qp(qp, "ib wr type %d unsupported\n", >>> + wr->opcode); >>> + rv = -EINVAL; >>> + break; >>> + } >>> + siw_dbg_qp(qp, "opcode %d, flags 0x%x\n", >>> + sqe->opcode, sqe->flags); >>> + >>> + if (unlikely(rv < 0)) >>> + break; >>> + >>> + /* make SQE only vaild after completely written */ >>> + smp_wmb(); >>> + sqe->flags |= SIW_WQE_VALID; >>> + >>> + qp->sq_put++; >>> + wr = wr->next; >>> + } >>> + >>> + /* >>> + * Send directly if SQ processing is not in progress. >>> + * Eventual immediate errors (rv < 0) do not affect the involved >>> + * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ >>> + * processing, if new work is already pending. But rv must be >> passed >>> + * to caller. >>> + */ >>> + if (wqe->wr_status != SIW_WR_IDLE) { >>> + spin_unlock_irqrestore(&qp->sq_lock, flags); >>> + goto skip_direct_sending; >>> + } >>> + rv = siw_activate_tx(qp); >>> + spin_unlock_irqrestore(&qp->sq_lock, flags); >>> + >>> + if (rv <= 0) >>> + goto skip_direct_sending; >>> + >>> + if (qp->kernel_verbs) { >>> + rv = siw_sq_start(qp); >>> + } else { >>> + qp->tx_ctx.in_syscall = 1; >>> + >>> + if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend)) >>> + siw_qp_cm_drop(qp, 0); >>> + >>> + qp->tx_ctx.in_syscall = 0; >>> + } >>> +skip_direct_sending: >>> + >>> + up_read(&qp->state_lock); >>> + >>> + if (rv >= 0) >>> + return 0; >>> + /* >>> + * Immediate error >>> + */ >>> + siw_dbg_qp(qp, "error %d\n", rv); >>> + >>> + *bad_wr = wr; >>> + return rv; >>> +} >>> + >>> +/* >>> + * siw_post_receive() >>> + * >>> + * Post a list of R-WR's to a RQ. >>> + * >>> + * @base_qp: Base QP contained in siw QP >>> + * @wr: Null terminated list of user WR's >>> + * @bad_wr: Points to failing WR in case of synchronous failure. >>> + */ >>> +int siw_post_receive(struct ib_qp *base_qp, const struct >> ib_recv_wr *wr, >>> + const struct ib_recv_wr **bad_wr) >>> +{ >>> + struct siw_qp *qp = to_siw_qp(base_qp); >> >> There's a tab instead of a space between siq_qp and *qp. > ok, will fix. > >> >>> + unsigned long flags; >>> + int rv = 0; >>> + >>> + if (qp->srq) { >>> + *bad_wr = wr; >>> + return -EOPNOTSUPP; /* what else from errno.h? */ >>> + } >>> + /* >>> + * Try to acquire QP state lock. Must be non-blocking >>> + * to accommodate kernel clients needs. >>> + */ >>> + if (!down_read_trylock(&qp->state_lock)) { >> >> Perhaps this rwsemlock should be replaced with a spinlock. >> > > I use semaphores and trylock for the QP to allow for > concurrent rx and tx activity as long as the QP state > permits. spinlock on the tx side would mute the rx side and > vice versa. at the other hand, QP state changes must wait until > getting the write_lock to change state, so after current rx or > tx has finished. > > if the read_trylock falis I also know someone _is_ currently > changing the QP state which means this QP goes into a state > after RTS and there is no point in doing tx / rx. > > >>> + *bad_wr = wr; >>> + return -ENOTCONN; >>> + } >>> + if (!qp->kernel_verbs) { >>> + siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); >>> + up_read(&qp->state_lock); >>> + *bad_wr = wr; >>> + return -EINVAL; >>> + } >>> + if (qp->attrs.state > SIW_QP_STATE_RTS) { >>> + up_read(&qp->state_lock); >>> + *bad_wr = wr; >>> + return -EINVAL; >>> + } >>> + /* >>> + * Serialize potentially multiple producers. >>> + * Not needed for single threaded consumer side. >>> + */ >>> + spin_lock_irqsave(&qp->rq_lock, flags); >>> + >>> + while (wr) { >>> + u32 idx = qp->rq_put % qp->attrs.rq_size; >>> + struct siw_rqe *rqe = &qp->recvq[idx]; >>> + >>> + if (rqe->flags) { >>> + siw_dbg_qp(qp, "Receive Queue full\n"); >>> + rv = -ENOMEM; >>> + break; >>> + } >>> + if (wr->num_sge > qp->attrs.rq_max_sges) { >>> + siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); >>> + rv = -EINVAL; >>> + break; >>> + } >>> + rqe->id = wr->wr_id; >>> + rqe->num_sge = wr->num_sge; >>> + siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); >>> + >>> + /* make sure RQE is completely written before valid */ >>> + smp_wmb(); >>> + >>> + rqe->flags = SIW_WQE_VALID; >>> + >>> + qp->rq_put++; >>> + wr = wr->next; >>> + } >>> + spin_unlock_irqrestore(&qp->rq_lock, flags); >>> + >>> + up_read(&qp->state_lock); >>> + >>> + if (rv < 0) { >>> + siw_dbg_qp(qp, "error %d\n", rv); >>> + *bad_wr = wr; >>> + } >>> + return rv > 0 ? 0 : rv; >>> +} >>> + >>> +int siw_destroy_cq(struct ib_cq *base_cq) >>> +{ >>> + struct siw_cq *cq = to_siw_cq(base_cq); >>> + struct ib_device *base_dev = base_cq->device; >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + >>> + siw_cq_flush(cq); >>> + >>> + siw_remove_obj(&sdev->lock, &sdev->cq_idr, &cq->hdr); >>> + siw_cq_put(cq); >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * siw_create_cq() >>> + * >>> + * Create CQ of requested size on given device. >>> + * >>> + * @base_dev: RDMA device contained in siw device >>> + * @size: maximum number of CQE's allowed. >>> + * @ib_context: user context. >>> + * @udata: used to provide CQ ID back to user. >>> + */ >>> + >>> +struct ib_cq *siw_create_cq(struct ib_device *base_dev, >>> + const struct ib_cq_init_attr *attr, >>> + struct ib_ucontext *ib_context, >>> + struct ib_udata *udata) >>> +{ >>> + struct siw_cq *cq = NULL; >>> + struct siw_device *sdev = to_siw_dev(base_dev); >>> + struct siw_uresp_create_cq uresp; >>> + int rv, size = attr->cqe; >>> + >>> + if (!base_dev) { >>> + rv = -ENODEV; >>> + goto err_out; >>> + } >> >> Is this really needed? >> > I hope not. ;) > >>> + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { >>> + siw_dbg(sdev, "too many cq's\n"); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (size < 1 || size > sdev->attrs.max_cqe) { >>> + siw_dbg(sdev, "cq size error: %d\n", size); >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + cq = kzalloc(sizeof(*cq), GFP_KERNEL); >>> + if (!cq) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + size = roundup_pow_of_two(size); >>> + cq->base_cq.cqe = size; >>> + cq->num_cqe = size; >> >> No reason to have another field for num_cqe, just use base_cq.cqe. > > We had this discussion before (for the kernel_verbs flag). This > is a software only driver. It does not load parameters into hardware > (such as queue sizes), but it wants to keep those handy. So > I want to keep the drivers fast path context dense. On fast path operations, > I do not want to reach out to the beginning of an ib_cq to fetch > some parameters, which is many bytes away from my cached context. > I could hide the queue size within a siw private queue structure, like > rxe does it, but I just have it explicit. > >> >>> + >>> + if (!ib_context) { >>> + cq->kernel_verbs = 1; >>> + cq->queue = vzalloc(size * sizeof(struct siw_cqe) >>> + + sizeof(struct siw_cq_ctrl)); >>> + } else { >>> + cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) >>> + + sizeof(struct siw_cq_ctrl)); >>> + } >>> + if (cq->queue == NULL) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + >>> + rv = siw_cq_add(sdev, cq); >>> + if (rv) >>> + goto err_out; >>> + >>> + spin_lock_init(&cq->lock); >>> + >>> + cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify; >>> + >>> + if (!cq->kernel_verbs) { >>> + struct siw_ucontext *ctx = to_siw_ctx(ib_context); >>> + >>> + uresp.cq_key = siw_insert_uobj(ctx, cq->queue, >>> + size * sizeof(struct siw_cqe) + >>> + sizeof(struct siw_cq_ctrl)); >>> + >>> + if (uresp.cq_key > SIW_MAX_UOBJ_KEY) >>> + siw_dbg(sdev, "[CQ %d]: preparing mmap failed\n", >>> + OBJ_ID(cq)); >>> + >>> + uresp.cq_id = OBJ_ID(cq); >>> + uresp.num_cqe = size; >>> + >>> + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); >>> + if (rv) >>> + goto err_out_idr; >>> + } >>> + return &cq->base_cq; >>> + >>> +err_out_idr: >>> + siw_remove_obj(&sdev->lock, &sdev->cq_idr, &cq->hdr); >>> +err_out: >>> + siw_dbg(sdev, "cq creation failed: %d", rv); >>> + >>> + if (cq && cq->queue) >>> + vfree(cq->queue); >>> + >>> + kfree(cq); >>> + atomic_dec(&sdev->num_cq); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +/* >>> + * siw_poll_cq() >>> + * >>> + * Reap CQ entries if available and copy work completion status >> into >>> + * array of WC's provided by caller. Returns number of reaped >> CQE's. >>> + * >>> + * @base_cq: Base CQ contained in siw CQ. >>> + * @num_cqe: Maximum number of CQE's to reap. >>> + * @wc: Array of work completions to be filled by siw. >>> + */ >>> +int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc >> *wc) >>> +{ >>> + struct siw_cq *cq = to_siw_cq(base_cq); >>> + int i; >>> + >>> + for (i = 0; i < num_cqe; i++) { >>> + if (!(siw_reap_cqe(cq, wc))) >> >> Extra paranthesis. >> > right, will fix that. > >>> + break; >>> + wc++; >>> + } >>> + return i; >>> +} >>> + >>> +/* >>> + * siw_req_notify_cq() >>> + * >>> + * Request notification for new CQE's added to that CQ. >>> + * Defined flags: >>> + * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification >>> + * event if a WQE with notification flag set enters the CQ >>> + * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification >>> + * event if a WQE enters the CQ. >>> + * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the >>> + * number of not reaped CQE's regardless of its notification >>> + * type and current or new CQ notification settings. >>> + * >>> + * @base_cq: Base CQ contained in siw CQ. >>> + * @flags: Requested notification flags. >>> + */ >>> +int siw_req_notify_cq(struct ib_cq *base_cq, enum >> ib_cq_notify_flags flags) >>> +{ >>> + struct siw_cq *cq = to_siw_cq(base_cq); >>> + >>> + siw_dbg(cq->hdr.sdev, "[CQ %d]: flags: 0x%8x\n", OBJ_ID(cq), >> flags); >>> + >>> + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) >>> + /* CQ event for next solicited completion */ >>> + smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED); >>> + else >>> + /* CQ event for any signalled completion */ >>> + smp_store_mb(*cq->notify, SIW_NOTIFY_ALL); >>> + >>> + if (flags & IB_CQ_REPORT_MISSED_EVENTS) >>> + return cq->cq_put - cq->cq_get; >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * siw_dereg_mr() >>> + * >>> + * Release Memory Region. >>> + * >>> + * TODO: Update function if Memory Windows are supported by siw: >>> + * Is OFED core checking for MW dependencies for current >>> + * MR before calling MR deregistration?. >>> + * >>> + * @base_mr: Base MR contained in siw MR. >>> + */ >>> +int siw_dereg_mr(struct ib_mr *base_mr) >>> +{ >>> + struct siw_mr *mr; >>> + struct siw_device *sdev = to_siw_dev(base_mr->device); >>> + >>> + mr = to_siw_mr(base_mr); >>> + >>> + siw_dbg(sdev, "[MEM %d]: deregister mr, #ref's %d\n", >>> + mr->mem.hdr.id, kref_read(&mr->mem.hdr.ref)); >>> + >>> + mr->mem.stag_valid = 0; >>> + >>> + siw_remove_obj(&sdev->lock, &sdev->mem_idr, &mr->mem.hdr); >>> + siw_mem_put(&mr->mem); >>> + >>> + return 0; >>> +} >>> + >>> +static struct siw_mr *siw_create_mr(struct siw_device *sdev, void >> *mem_obj, >>> + u64 start, u64 len, int rights) >>> +{ >>> + struct siw_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); >>> + unsigned long flags; >>> + >>> + if (!mr) >>> + return NULL; >>> + >>> + mr->mem.stag_valid = 0; >>> + >>> + if (siw_mem_add(sdev, &mr->mem) < 0) { >>> + kfree(mr); >>> + return NULL; >>> + } >>> + siw_dbg(sdev, "[MEM %d]: new mr, object 0x%p\n", >>> + mr->mem.hdr.id, mem_obj); >>> + >>> + mr->base_mr.lkey = mr->base_mr.rkey = mr->mem.hdr.id << 8; >>> + >>> + mr->mem.va = start; >>> + mr->mem.len = len; >>> + mr->mem.mr = NULL; >>> + mr->mem.perms = SIW_MEM_LREAD | /* not selectable in RDMA core */ >>> + (rights & IB_ACCESS_REMOTE_READ ? SIW_MEM_RREAD : 0) | >>> + (rights & IB_ACCESS_LOCAL_WRITE ? SIW_MEM_LWRITE : 0) | >>> + (rights & IB_ACCESS_REMOTE_WRITE ? SIW_MEM_RWRITE : 0); >>> + >>> + mr->mem_obj = mem_obj; >>> + >>> + INIT_LIST_HEAD(&mr->devq); >>> + spin_lock_irqsave(&sdev->lock, flags); >>> + list_add_tail(&mr->devq, &sdev->mr_list); >>> + spin_unlock_irqrestore(&sdev->lock, flags); >>> + >>> + return mr; >>> +} >>> + >>> +/* >>> + * siw_reg_user_mr() >>> + * >>> + * Register Memory Region. >>> + * >>> + * @base_pd: Base PD contained in siw PD. >>> + * @start: starting address of MR (virtual address) >>> + * @len: len of MR >>> + * @rnic_va: not used by siw >>> + * @rights: MR access rights >>> + * @udata: user buffer to communicate STag and Key. >>> + */ >>> +struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, >> u64 len, >>> + u64 rnic_va, int rights, struct ib_udata *udata) >>> +{ >>> + struct siw_mr *mr = NULL; >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_umem *umem = NULL; >>> + struct siw_ureq_reg_mr ureq; >>> + struct siw_uresp_reg_mr uresp; >>> + struct siw_device *sdev = pd->hdr.sdev; >>> + >>> + unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); >>> + int rv; >>> + >>> + siw_dbg(sdev, "[PD %d]: start: 0x%016llx, va: 0x%016llx, len: >> %llu\n", >>> + OBJ_ID(pd), (unsigned long long)start, >>> + (unsigned long long)rnic_va, (unsigned long long)len); >>> + >>> + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { >>> + siw_dbg(sdev, "[PD %d]: too many mr's\n", OBJ_ID(pd)); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (!len) { >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + if (mem_limit != RLIM_INFINITY) { >>> + unsigned long num_pages = >>> + (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT; >>> + mem_limit >>= PAGE_SHIFT; >>> + >>> + if (num_pages > mem_limit - current->mm->locked_vm) { >>> + siw_dbg(sdev, >>> + "[PD %d]: pages req %lu, max %lu, lock %lu\n", >>> + OBJ_ID(pd), num_pages, mem_limit, >>> + current->mm->locked_vm); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + } >>> + umem = siw_umem_get(start, len); >>> + if (IS_ERR(umem)) { >>> + rv = PTR_ERR(umem); >>> + siw_dbg(sdev, "[PD %d]: getting user memory failed: %d\n", >>> + OBJ_ID(pd), rv); >>> + umem = NULL; >>> + goto err_out; >>> + } >>> + mr = siw_create_mr(sdev, umem, start, len, rights); >>> + if (!mr) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (udata) { >>> + rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); >>> + if (rv) >>> + goto err_out; >>> + >>> + mr->base_mr.lkey |= ureq.stag_key; >>> + mr->base_mr.rkey |= ureq.stag_key; >>> + uresp.stag = mr->base_mr.lkey; >>> + >>> + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); >>> + if (rv) >>> + goto err_out; >>> + } >>> + mr->pd = pd; >>> + siw_pd_get(pd); >>> + >>> + mr->mem.stag_valid = 1; >>> + >>> + return &mr->base_mr; >>> + >>> +err_out: >>> + if (mr) { >>> + siw_remove_obj(&sdev->lock, &sdev->mem_idr, &mr->mem.hdr); >>> + siw_mem_put(&mr->mem); >>> + umem = NULL; >>> + } else >>> + atomic_dec(&sdev->num_mr); >>> + >>> + if (umem) >>> + siw_umem_release(umem); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type >> mr_type, >>> + u32 max_sge) >>> +{ >>> + struct siw_mr *mr; >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_device *sdev = pd->hdr.sdev; >>> + struct siw_pbl *pbl = NULL; >>> + int rv; >>> + >>> + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { >>> + siw_dbg(sdev, "[PD %d]: too many mr's\n", OBJ_ID(pd)); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (mr_type != IB_MR_TYPE_MEM_REG) { >>> + siw_dbg(sdev, "[PD %d]: mr type %d unsupported\n", >>> + OBJ_ID(pd), mr_type); >>> + rv = -EOPNOTSUPP; >>> + goto err_out; >>> + } >>> + if (max_sge > SIW_MAX_SGE_PBL) { >>> + siw_dbg(sdev, "[PD %d]: too many sge's: %d\n", >>> + OBJ_ID(pd), max_sge); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + pbl = siw_pbl_alloc(max_sge); >>> + if (IS_ERR(pbl)) { >>> + rv = PTR_ERR(pbl); >>> + siw_dbg(sdev, "[PD %d]: pbl allocation failed: %d\n", >>> + OBJ_ID(pd), rv); >>> + pbl = NULL; >>> + goto err_out; >>> + } >>> + mr = siw_create_mr(sdev, pbl, 0, max_sge * PAGE_SIZE, 0); >>> + if (!mr) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + mr->mem.is_pbl = 1; >>> + mr->pd = pd; >>> + siw_pd_get(pd); >>> + >>> + siw_dbg(sdev, "[PD %d], [MEM %d]: success\n", >>> + OBJ_ID(pd), OBJ_ID(&mr->mem)); >>> + >>> + return &mr->base_mr; >>> + >>> +err_out: >>> + if (pbl) >>> + siw_pbl_free(pbl); >>> + >>> + siw_dbg(sdev, "[PD %d]: failed: %d\n", OBJ_ID(pd), rv); >>> + >>> + atomic_dec(&sdev->num_mr); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +/* Just used to count number of pages being mapped */ >>> +static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr) >>> +{ >>> + return 0; >>> +} >>> + >>> +int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, >> int num_sle, >>> + unsigned int *sg_off) >>> +{ >>> + struct scatterlist *slp; >>> + struct siw_mr *mr = to_siw_mr(base_mr); >>> + struct siw_pbl *pbl = mr->pbl; >>> + struct siw_pble *pble = pbl->pbe; >>> + u64 pbl_size; >>> + int i, rv; >>> + >>> + if (!pbl) { >> >> You already dereferenced pbl for pble assignment. >> > uuuhh, yes! > >>> + siw_dbg(mr->mem.hdr.sdev, "[MEM %d]: no pbl allocated\n", >>> + OBJ_ID(&mr->mem)); >>> + return -EINVAL; >>> + } >>> + if (pbl->max_buf < num_sle) { >>> + siw_dbg(mr->mem.hdr.sdev, "[MEM %d]: too many sge's: %d>%d\n", >>> + OBJ_ID(&mr->mem), mr->pbl->max_buf, num_sle); >>> + return -ENOMEM; >>> + } >>> + >>> + for_each_sg(sl, slp, num_sle, i) { >>> + if (sg_dma_len(slp) == 0) { >>> + siw_dbg(mr->mem.hdr.sdev, "[MEM %d]: empty sge\n", >>> + OBJ_ID(&mr->mem)); >>> + return -EINVAL; >>> + } >>> + if (i == 0) { >>> + pble->addr = sg_dma_address(slp); >>> + pble->size = sg_dma_len(slp); >>> + pble->pbl_off = 0; >>> + pbl_size = pble->size; >>> + pbl->num_buf = 1; >>> + >>> + continue; >>> + } >>> + /* Merge PBL entries if adjacent */ >>> + if (pble->addr + pble->size == sg_dma_address(slp)) >>> + pble->size += sg_dma_len(slp); >>> + else { >>> + pble++; >>> + pbl->num_buf++; >>> + pble->addr = sg_dma_address(slp); >>> + pble->size = sg_dma_len(slp); >>> + pble->pbl_off = pbl_size; >>> + } >>> + pbl_size += sg_dma_len(slp); >>> + >>> + siw_dbg(mr->mem.hdr.sdev, >>> + "[MEM %d]: sge[%d], size %llu, addr %p, total %llu\n", >>> + OBJ_ID(&mr->mem), i, pble->size, (void *)pble->addr, >>> + pbl_size); >>> + } >>> + rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, >> siw_set_pbl_page); >>> + if (rv > 0) { >>> + mr->mem.len = base_mr->length; >>> + mr->mem.va = base_mr->iova; >>> + siw_dbg(mr->mem.hdr.sdev, >>> + "[MEM %d]: %llu byte, %u SLE into %u entries\n", >>> + OBJ_ID(&mr->mem), mr->mem.len, num_sle, pbl->num_buf); >>> + } >>> + return rv; >>> +} >>> + >>> +/* >>> + * siw_get_dma_mr() >>> + * >>> + * Create a (empty) DMA memory region, where no umem is attached. >>> + */ >>> +struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights) >>> +{ >>> + struct siw_mr *mr; >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_device *sdev = pd->hdr.sdev; >>> + int rv; >>> + >>> + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { >>> + siw_dbg(sdev, "[PD %d]: too many mr's\n", OBJ_ID(pd)); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + mr = siw_create_mr(sdev, NULL, 0, ULONG_MAX, rights); >>> + if (!mr) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + mr->mem.stag_valid = 1; >>> + >>> + mr->pd = pd; >>> + siw_pd_get(pd); >>> + >>> + siw_dbg(sdev, "[PD %d], [MEM %d]: success\n", >>> + OBJ_ID(pd), OBJ_ID(&mr->mem)); >>> + >>> + return &mr->base_mr; >>> + >>> +err_out: >>> + atomic_dec(&sdev->num_mr); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +/* >>> + * siw_create_srq() >>> + * >>> + * Create Shared Receive Queue of attributes @init_attrs >>> + * within protection domain given by @base_pd. >>> + * >>> + * @base_pd: Base PD contained in siw PD. >>> + * @init_attrs: SRQ init attributes. >>> + * @udata: not used by siw. >>> + */ >>> +struct ib_srq *siw_create_srq(struct ib_pd *base_pd, >>> + struct ib_srq_init_attr *init_attrs, >>> + struct ib_udata *udata) >>> +{ >>> + struct siw_srq *srq = NULL; >>> + struct ib_srq_attr *attrs = &init_attrs->attr; >>> + struct siw_pd *pd = to_siw_pd(base_pd); >>> + struct siw_device *sdev = pd->hdr.sdev; >>> + >>> + int kernel_verbs = base_pd->uobject ? 0 : 1; >>> + int rv; >>> + >>> + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { >>> + siw_dbg(sdev, "too many SRQ's\n"); >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR || >>> + attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > >> attrs->max_wr) { >>> + rv = -EINVAL; >>> + goto err_out; >>> + } >>> + >>> + srq = kzalloc(sizeof(*srq), GFP_KERNEL); >>> + if (!srq) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + >>> + srq->max_sge = attrs->max_sge; >>> + srq->num_rqe = roundup_pow_of_two(attrs->max_wr); >>> + atomic_set(&srq->space, srq->num_rqe); >>> + >>> + srq->limit = attrs->srq_limit; >>> + if (srq->limit) >>> + srq->armed = 1; >>> + >>> + if (kernel_verbs) >>> + srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe)); >>> + else >>> + srq->recvq = vmalloc_user(srq->num_rqe * >>> + sizeof(struct siw_rqe)); >>> + >>> + if (srq->recvq == NULL) { >>> + rv = -ENOMEM; >>> + goto err_out; >>> + } >>> + if (kernel_verbs) { >>> + srq->kernel_verbs = 1; >>> + } else if (udata) { >>> + struct siw_uresp_create_srq uresp; >>> + struct siw_ucontext *ctx; >>> + >>> + memset(&uresp, 0, sizeof(uresp)); >>> + ctx = to_siw_ctx(base_pd->uobject->context); >>> + >>> + uresp.num_rqe = srq->num_rqe; >>> + uresp.srq_key = siw_insert_uobj(ctx, srq->recvq, >>> + srq->num_rqe * sizeof(struct siw_rqe)); >>> + >>> + if (uresp.srq_key > SIW_MAX_UOBJ_KEY) >>> + siw_dbg(sdev, "preparing mmap srq failed\n"); >>> + >>> + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); >>> + if (rv) >>> + goto err_out; >>> + } >>> + srq->pd = pd; >>> + siw_pd_get(pd); >>> + >>> + spin_lock_init(&srq->lock); >>> + >>> + siw_dbg(sdev, "[SRQ 0x%p]: success\n", srq); >>> + >>> + return &srq->base_srq; >>> + >>> +err_out: >>> + if (srq) { >>> + if (srq->recvq) >>> + vfree(srq->recvq); >>> + kfree(srq); >>> + } >>> + atomic_dec(&sdev->num_srq); >>> + >>> + return ERR_PTR(rv); >>> +} >>> + >>> +/* >>> + * siw_modify_srq() >>> + * >>> + * Modify SRQ. The caller may resize SRQ and/or set/reset >> notification >>> + * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification. >>> + * >>> + * NOTE: it is unclear if RDMA core allows for changing the >> MAX_SGE >>> + * parameter. siw_modify_srq() does not check the attrs->max_sge >> param. >>> + */ >>> +int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr >> *attrs, >>> + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) >>> +{ >>> + struct siw_srq *srq = to_siw_srq(base_srq); >>> + unsigned long flags; >>> + int rv = 0; >>> + >>> + spin_lock_irqsave(&srq->lock, flags); >>> + >>> + if (attr_mask & IB_SRQ_MAX_WR) { >>> + /* resize request not yet supported */ >>> + rv = -EOPNOTSUPP; >>> + goto out; >>> + } >>> + if (attr_mask & IB_SRQ_LIMIT) { >>> + if (attrs->srq_limit) { >>> + if (unlikely(attrs->srq_limit > srq->num_rqe)) { >>> + rv = -EINVAL; >>> + goto out; >>> + } >>> + srq->armed = 1; >>> + } else >>> + srq->armed = 0; >>> + >>> + srq->limit = attrs->srq_limit; >>> + } >>> +out: >>> + spin_unlock_irqrestore(&srq->lock, flags); >>> + >>> + return rv; >>> +} >>> + >>> +/* >>> + * siw_query_srq() >>> + * >>> + * Query SRQ attributes. >>> + */ >>> +int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr >> *attrs) >>> +{ >>> + struct siw_srq *srq = to_siw_srq(base_srq); >>> + unsigned long flags; >>> + >>> + spin_lock_irqsave(&srq->lock, flags); >>> + >>> + attrs->max_wr = srq->num_rqe; >>> + attrs->max_sge = srq->max_sge; >>> + attrs->srq_limit = srq->limit; >>> + >>> + spin_unlock_irqrestore(&srq->lock, flags); >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * siw_destroy_srq() >>> + * >>> + * Destroy SRQ. >>> + * It is assumed that the SRQ is not referenced by any >>> + * QP anymore - the code trusts the RDMA core environment to keep >> track >>> + * of QP references. >>> + */ >>> +int siw_destroy_srq(struct ib_srq *base_srq) >>> +{ >>> + struct siw_srq *srq = to_siw_srq(base_srq); >>> + struct siw_device *sdev = srq->pd->hdr.sdev; >>> + >>> + siw_pd_put(srq->pd); >>> + >>> + vfree(srq->recvq); >>> + kfree(srq); >>> + >>> + atomic_dec(&sdev->num_srq); >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * siw_post_srq_recv() >>> + * >>> + * Post a list of receive queue elements to SRQ. >>> + * NOTE: The function does not check or lock a certain SRQ state >>> + * during the post operation. The code simply trusts the >>> + * RDMA core environment. >>> + * >>> + * @base_srq: Base SRQ contained in siw SRQ >>> + * @wr: List of R-WR's >>> + * @bad_wr: Updated to failing WR if posting fails. >>> + */ >>> +int siw_post_srq_recv(struct ib_srq *base_srq, const struct >> ib_recv_wr *wr, >>> + const struct ib_recv_wr **bad_wr) >>> +{ >>> + struct siw_srq *srq = to_siw_srq(base_srq); >>> + unsigned long flags; >>> + int rv = 0; >>> + >>> + if (!srq->kernel_verbs) { >>> + siw_dbg(srq->pd->hdr.sdev, >>> + "[SRQ 0x%p]: no kernel post_recv for mapped srq\n", >>> + srq); >>> + rv = -EINVAL; >>> + goto out; >>> + } >>> + /* >>> + * Serialize potentially multiple producers. >>> + * Not needed for single threaded consumer side. >>> + */ >>> + spin_lock_irqsave(&srq->lock, flags); >>> + >>> + while (wr) { >>> + u32 idx = srq->rq_put % srq->num_rqe; >>> + struct siw_rqe *rqe = &srq->recvq[idx]; >>> + >>> + if (rqe->flags) { >>> + siw_dbg(srq->pd->hdr.sdev, "SRQ full\n"); >>> + rv = -ENOMEM; >>> + break; >>> + } >>> + if (wr->num_sge > srq->max_sge) { >>> + siw_dbg(srq->pd->hdr.sdev, >>> + "[SRQ 0x%p]: too many sge's: %d\n", >>> + srq, wr->num_sge); >>> + rv = -EINVAL; >>> + break; >>> + } >>> + rqe->id = wr->wr_id; >>> + rqe->num_sge = wr->num_sge; >>> + siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); >>> + >>> + /* Make sure S-RQE is completely written before valid */ >>> + smp_wmb(); >>> + >>> + rqe->flags = SIW_WQE_VALID; >>> + >>> + srq->rq_put++; >>> + wr = wr->next; >>> + } >>> + spin_unlock_irqrestore(&srq->lock, flags); >>> +out: >>> + if (unlikely(rv < 0)) { >>> + siw_dbg(srq->pd->hdr.sdev, "[SRQ 0x%p]: error %d\n", srq, rv); >>> + *bad_wr = wr; >>> + } >>> + return rv; >>> +} >>> diff --git a/drivers/infiniband/sw/siw/siw_verbs.h >> b/drivers/infiniband/sw/siw/siw_verbs.h >>> new file mode 100644 >>> index 000000000000..5e108d98280c >>> --- /dev/null >>> +++ b/drivers/infiniband/sw/siw/siw_verbs.h >>> @@ -0,0 +1,114 @@ >>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>> +/* >>> + * Software iWARP device driver >>> + * >>> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >>> + * >>> + * Copyright (c) 2008-2019, IBM Corporation >>> + * >>> + * This software is available to you under a choice of one of two >>> + * licenses. You may choose to be licensed under the terms of the >> GNU >>> + * General Public License (GPL) Version 2, available from the file >>> + * COPYING in the main directory of this source tree, or the >>> + * BSD license below: >>> + * >>> + * Redistribution and use in source and binary forms, with or >>> + * without modification, are permitted provided that the >> following >>> + * conditions are met: >>> + * >>> + * - Redistributions of source code must retain the above >> copyright notice, >>> + * this list of conditions and the following disclaimer. >>> + * >>> + * - Redistributions in binary form must reproduce the above >> copyright >>> + * notice, this list of conditions and the following >> disclaimer in the >>> + * documentation and/or other materials provided with the >> distribution. >>> + * >>> + * - Neither the name of IBM nor the names of its contributors >> may be >>> + * used to endorse or promote products derived from this >> software without >>> + * specific prior written permission. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >> OF >>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >> HOLDERS >>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >> AN >>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >> IN >>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> THE >>> + * SOFTWARE. >>> + */ >>> + >>> +#ifndef _SIW_VERBS_H >>> +#define _SIW_VERBS_H >>> + >>> +#include <linux/errno.h> >>> + >>> +#include <rdma/iw_cm.h> >>> +#include <rdma/ib_verbs.h> >>> +#include <rdma/ib_smi.h> >>> +#include <rdma/ib_user_verbs.h> >>> + >>> +#include "siw.h" >>> +#include "siw_cm.h" >>> + >>> + >>> +extern struct ib_ucontext *siw_alloc_ucontext(struct ib_device >> *ibdev, >>> + struct ib_udata *udata); >>> +extern int siw_dealloc_ucontext(struct ib_ucontext *ucontext); >>> +extern int siw_query_port(struct ib_device *ibdev, u8 port, >>> + struct ib_port_attr *attr); >>> +extern int siw_get_port_immutable(struct ib_device *ibdev, u8 >> port, >>> + struct ib_port_immutable *port_imm); >>> +extern int siw_query_device(struct ib_device *ibdev, >>> + struct ib_device_attr *attr, >>> + struct ib_udata *udata); >>> +extern struct ib_cq *siw_create_cq(struct ib_device *ibdev, >>> + const struct ib_cq_init_attr *attr, >>> + struct ib_ucontext *ucontext, >>> + struct ib_udata *udata); >>> +extern int siw_query_port(struct ib_device *ibdev, u8 port, >>> + struct ib_port_attr *attr); >>> +extern int siw_query_pkey(struct ib_device *ibdev, u8 port, >>> + u16 idx, u16 *pkey); >>> +extern int siw_query_gid(struct ib_device *ibdev, u8 port, int >> idx, >>> + union ib_gid *gid); >>> +extern int siw_alloc_pd(struct ib_pd *base_pd, struct ib_ucontext >> *context, >>> + struct ib_udata *udata); >>> +extern void siw_dealloc_pd(struct ib_pd *pd); >>> +extern struct ib_qp *siw_create_qp(struct ib_pd *pd, >>> + struct ib_qp_init_attr *attr, >>> + struct ib_udata *udata); >>> +extern int siw_query_qp(struct ib_qp *ofa_qp, struct ib_qp_attr >> *qp_attr, >>> + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); >>> +extern int siw_verbs_modify_qp(struct ib_qp *ibqp, struct >> ib_qp_attr *attr, >>> + int attr_mask, struct ib_udata *udata); >>> +extern int siw_destroy_qp(struct ib_qp *ibqp); >>> +extern int siw_post_send(struct ib_qp *ibqp, const struct >> ib_send_wr *wr, >>> + const struct ib_send_wr **bad_wr); >>> +extern int siw_post_receive(struct ib_qp *ibqp, const struct >> ib_recv_wr *wr, >>> + const struct ib_recv_wr **bad_wr); >>> +extern int siw_destroy_cq(struct ib_cq *ibcq); >>> +extern int siw_poll_cq(struct ib_cq *ibcq, int num_entries, struct >> ib_wc *wc); >>> +extern int siw_req_notify_cq(struct ib_cq *ibcq, enum >> ib_cq_notify_flags flags); >>> +extern struct ib_mr *siw_reg_user_mr(struct ib_pd *ibpd, u64 >> start, u64 len, >>> + u64 rnic_va, int rights, >>> + struct ib_udata *udata); >>> +extern struct ib_mr *siw_alloc_mr(struct ib_pd *ibpd, enum >> ib_mr_type mr_type, >>> + u32 max_sge); >>> +extern struct ib_mr *siw_get_dma_mr(struct ib_pd *ibpd, int >> rights); >>> +extern int siw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist >> *sl, >>> + int num_sle, unsigned int *sg_off); >>> +extern int siw_dereg_mr(struct ib_mr *ibmr); >>> +extern struct ib_srq *siw_create_srq(struct ib_pd *ibpd, >>> + struct ib_srq_init_attr *attr, >>> + struct ib_udata *udata); >>> +extern int siw_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr >> *attr, >>> + enum ib_srq_attr_mask mask, struct ib_udata *udata); >>> +extern int siw_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr >> *attr); >>> +extern int siw_destroy_srq(struct ib_srq *ibsrq); >>> +extern int siw_post_srq_recv(struct ib_srq *ibsrq, const struct >> ib_recv_wr *wr, >>> + const struct ib_recv_wr **bad_wr); >>> +extern int siw_mmap(struct ib_ucontext *ibctx, struct >> vm_area_struct *vma); >> >> Not a big deal, but most of the functions here use different >> arguments names >> than the ones in the C file. >> > > yes, that's ugly. will change that. > >>> + >>> +extern const struct dma_map_ops siw_dma_generic_ops; >>> + >>> +#endif >>> diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h >> b/include/uapi/rdma/rdma_user_ioctl_cmds.h >>> index 06c34d99be85..a5cb2af9b829 100644 >>> --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h >>> +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h >>> @@ -102,6 +102,7 @@ enum rdma_driver_id { >>> RDMA_DRIVER_RXE, >>> RDMA_DRIVER_HFI1, >>> RDMA_DRIVER_QIB, >>> + RDMA_DRIVER_SIW >>> }; >>> >>> #endif >>> diff --git a/include/uapi/rdma/siw_user.h >> b/include/uapi/rdma/siw_user.h >>> new file mode 100644 >>> index 000000000000..6300a10e809d >>> --- /dev/null >>> +++ b/include/uapi/rdma/siw_user.h >>> @@ -0,0 +1,223 @@ >>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>> +/* >>> + * Software iWARP device driver for Linux >>> + * >>> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >>> + * >>> + * Copyright (c) 2008-2017, IBM Corporation >>> + * >>> + * This software is available to you under a choice of one of two >>> + * licenses. You may choose to be licensed under the terms of the >> GNU >>> + * General Public License (GPL) Version 2, available from the file >>> + * COPYING in the main directory of this source tree, or the >>> + * BSD license below: >>> + * >>> + * Redistribution and use in source and binary forms, with or >>> + * without modification, are permitted provided that the >> following >>> + * conditions are met: >>> + * >>> + * - Redistributions of source code must retain the above >> copyright notice, >>> + * this list of conditions and the following disclaimer. >>> + * >>> + * - Redistributions in binary form must reproduce the above >> copyright >>> + * notice, this list of conditions and the following >> disclaimer in the >>> + * documentation and/or other materials provided with the >> distribution. >>> + * >>> + * - Neither the name of IBM nor the names of its contributors >> may be >>> + * used to endorse or promote products derived from this >> software without >>> + * specific prior written permission. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >> OF >>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >> HOLDERS >>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >> AN >>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >> IN >>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> THE >>> + * SOFTWARE. >>> + */ >>> + >>> +#ifndef _SIW_USER_H >>> +#define _SIW_USER_H >>> + >>> +#include <linux/types.h> >>> + >>> +/*Common string that is matched to accept the device by the user >> library*/ >>> +#define SIW_NODE_DESC_COMMON "Software iWARP stack" >>> + >>> +#define SIW_IBDEV_PREFIX "siw_" >>> + >>> +#define VERSION_ID_SOFTIWARP 2 >>> + >>> +#define SIW_MAX_SGE 6 >>> +#define SIW_MAX_UOBJ_KEY 0xffffff >>> +#define SIW_INVAL_UOBJ_KEY (SIW_MAX_UOBJ_KEY + 1) >>> + >>> +struct siw_uresp_create_cq { >>> + __u32 cq_id; >>> + __u32 num_cqe; >>> + __u32 cq_key; >>> + __u32 pad; >>> +}; >>> + >>> +struct siw_uresp_create_qp { >>> + __u32 qp_id; >>> + __u32 num_sqe; >>> + __u32 num_rqe; >>> + __u32 sq_key; >>> + __u32 rq_key; >>> + __u32 pad; >>> +}; >>> + >>> +struct siw_ureq_reg_mr { >>> + __u8 stag_key; >>> + __u8 reserved[3]; >>> + __u32 pad; >>> +}; >>> + >>> +struct siw_uresp_reg_mr { >>> + __u32 stag; >>> + __u32 pad; >>> +}; >>> + >>> +struct siw_uresp_create_srq { >>> + __u32 num_rqe; >>> + __u32 srq_key; >>> +}; >>> + >>> +struct siw_uresp_alloc_ctx { >>> + __u32 dev_id; >>> + __u32 pad; >>> +}; >>> + >>> +enum siw_opcode { >>> + SIW_OP_WRITE = 0, >>> + SIW_OP_READ = 1, >>> + SIW_OP_READ_LOCAL_INV = 2, >>> + SIW_OP_SEND = 3, >>> + SIW_OP_SEND_WITH_IMM = 4, >>> + SIW_OP_SEND_REMOTE_INV = 5, >>> + >>> + /* Unsupported */ >>> + SIW_OP_FETCH_AND_ADD = 6, >>> + SIW_OP_COMP_AND_SWAP = 7, >>> + >>> + SIW_OP_RECEIVE = 8, >>> + /* provider internal SQE */ >>> + SIW_OP_READ_RESPONSE = 9, >>> + /* >>> + * below opcodes valid for >>> + * in-kernel clients only >>> + */ >>> + SIW_OP_INVAL_STAG = 10, >>> + SIW_OP_REG_MR = 11, >>> + SIW_NUM_OPCODES = 12 >>> +}; >>> + >>> +/* Keep it same as ibv_sge to allow for memcpy */ >>> +struct siw_sge { >>> + __aligned_u64 laddr; >>> + __u32 length; >>> + __u32 lkey; >>> +}; >>> + >>> +/* >>> + * Inline data are kept within the work request itself occupying >>> + * the space of sge[1] .. sge[n]. Therefore, inline data cannot be >>> + * supported if SIW_MAX_SGE is below 2 elements. >>> + */ >>> +#define SIW_MAX_INLINE (sizeof(struct siw_sge) * (SIW_MAX_SGE - >> 1)) >>> + >>> +#if SIW_MAX_SGE < 2 >>> +#error "SIW_MAX_SGE must be at least 2" >>> +#endif >>> + >>> +enum siw_wqe_flags { >>> + SIW_WQE_VALID = 1, >>> + SIW_WQE_INLINE = (1 << 1), >>> + SIW_WQE_SIGNALLED = (1 << 2), >>> + SIW_WQE_SOLICITED = (1 << 3), >>> + SIW_WQE_READ_FENCE = (1 << 4), >>> + SIW_WQE_COMPLETED = (1 << 5) >>> +}; >>> + >>> +/* Send Queue Element */ >>> +struct siw_sqe { >>> + __aligned_u64 id; >>> + __u16 flags; >>> + __u8 num_sge; >>> + /* Contains enum siw_opcode values */ >>> + __u8 opcode; >>> + __u32 rkey; >>> + union { >>> + __aligned_u64 raddr; >>> + __aligned_u64 base_mr; >>> + }; >>> + union { >>> + struct siw_sge sge[SIW_MAX_SGE]; >>> + __aligned_u64 access; >>> + }; >>> +}; >>> + >>> +/* Receive Queue Element */ >>> +struct siw_rqe { >>> + __aligned_u64 id; >>> + __u16 flags; >>> + __u8 num_sge; >>> + /* >>> + * only used by kernel driver, >>> + * ignored if set by user >>> + */ >>> + __u8 opcode; >>> + __u32 unused; >>> + struct siw_sge sge[SIW_MAX_SGE]; >>> +}; >>> + >>> +enum siw_notify_flags { >>> + SIW_NOTIFY_NOT = (0), >>> + SIW_NOTIFY_SOLICITED = (1 << 0), >>> + SIW_NOTIFY_NEXT_COMPLETION = (1 << 1), >>> + SIW_NOTIFY_MISSED_EVENTS = (1 << 2), >>> + SIW_NOTIFY_ALL = SIW_NOTIFY_SOLICITED | >>> + SIW_NOTIFY_NEXT_COMPLETION | >>> + SIW_NOTIFY_MISSED_EVENTS >>> +}; >>> + >>> +enum siw_wc_status { >>> + SIW_WC_SUCCESS = 0, >>> + SIW_WC_LOC_LEN_ERR = 1, >>> + SIW_WC_LOC_PROT_ERR = 2, >>> + SIW_WC_LOC_QP_OP_ERR = 3, >>> + SIW_WC_WR_FLUSH_ERR = 4, >>> + SIW_WC_BAD_RESP_ERR = 5, >>> + SIW_WC_LOC_ACCESS_ERR = 6, >>> + SIW_WC_REM_ACCESS_ERR = 7, >>> + SIW_WC_REM_INV_REQ_ERR = 8, >>> + SIW_WC_GENERAL_ERR = 9, >>> + SIW_NUM_WC_STATUS = 10 >>> +}; >>> + >>> +struct siw_qp; >>> + >>> +struct siw_cqe { >>> + __aligned_u64 id; >>> + __u8 flags; >>> + __u8 opcode; >>> + __u16 status; >>> + __u32 bytes; >>> + __aligned_u64 imm_data; >>> + /* QP number or QP pointer */ >>> + union { >>> + struct siw_qp *qp; >>> + __aligned_u64 qp_id; >>> + }; >>> +}; >>> + >>> +/* >>> + * Shared structure between user and kernel >>> + * to control CQ arming. >>> + */ >>> +struct siw_cq_ctrl { >>> + __aligned_u64 notify; >>> +}; >>> +#endif >>> >> >> Reviewed-by: Gal Pressman <galpress@xxxxxxxxxx> >> >> >