-----"Leon Romanovsky" <leon@xxxxxxxxxx> wrote: ----- >To: "Bernard Metzler" <bmt@xxxxxxxxxxxxxx> >From: "Leon Romanovsky" <leon@xxxxxxxxxx> >Date: 02/24/2019 02:17PM >Cc: linux-rdma@xxxxxxxxxxxxxxx >Subject: Re: [PATCH v5 04/13] SIW object management > >On Tue, Feb 19, 2019 at 11:08:54AM +0100, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >> --- >> drivers/infiniband/sw/siw/siw_obj.c | 341 >++++++++++++++++++++++++++++ >> drivers/infiniband/sw/siw/siw_obj.h | 226 ++++++++++++++++++ >> 2 files changed, 567 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_obj.c >> create mode 100644 drivers/infiniband/sw/siw/siw_obj.h >> >> diff --git a/drivers/infiniband/sw/siw/siw_obj.c >b/drivers/infiniband/sw/siw/siw_obj.c >> new file mode 100644 >> index 000000000000..a086a3c9aca7 >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_obj.c >> @@ -0,0 +1,341 @@ >> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >> +/* >> + * Software iWARP device driver >> + * >> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >> + * >> + * Copyright (c) 2008-2018, IBM Corporation >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the >GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the >following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >copyright notice, >> + * this list of conditions and the following disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >copyright >> + * notice, this list of conditions and the following >disclaimer in the >> + * documentation and/or other materials provided with the >distribution. >> + * >> + * - Neither the name of IBM nor the names of its contributors >may be >> + * used to endorse or promote products derived from this >software without >> + * specific prior written permission. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >THE >> + * SOFTWARE. >> + */ >> + >> +#include <linux/spinlock.h> >> +#include <linux/kref.h> >> +#include <linux/vmalloc.h> >> + >> +#include "siw.h" >> +#include "siw_obj.h" >> +#include "siw_cm.h" >> + >> +void siw_objhdr_init(struct siw_objhdr *hdr) >> +{ >> + kref_init(&hdr->ref); >> +} >> + >> +void siw_idr_init(struct siw_device *sdev) >> +{ >> + spin_lock_init(&sdev->lock); >> + >> + idr_init(&sdev->qp_idr); >> + idr_init(&sdev->cq_idr); >> + idr_init(&sdev->pd_idr); >> + idr_init(&sdev->mem_idr); >> +} >> + >> +void siw_idr_release(struct siw_device *sdev) >> +{ >> + idr_destroy(&sdev->qp_idr); >> + idr_destroy(&sdev->cq_idr); >> + idr_destroy(&sdev->pd_idr); >> + idr_destroy(&sdev->mem_idr); >> +} >> + >> +static int siw_add_obj(spinlock_t *lock, struct idr *idr, >> + struct siw_objhdr *obj) >> +{ >> + unsigned long flags; >> + int id, min_id; >> + >> + get_random_bytes(&min_id, 4); >> + min_id &= 0x00FFFFFF; >> +again: >> + spin_lock_irqsave(lock, flags); >> + id = idr_alloc(idr, obj, min_id, 0x00FFFFFF - 1, GFP_KERNEL); >> + spin_unlock_irqrestore(lock, flags); >> + >> + if (id > 0) { >> + siw_objhdr_init(obj); >> + obj->id = id; >> + } else if (id == -ENOSPC && min_id != 1) { >> + min_id = 1; >> + goto again; >> + } else { >> + pr_warn("SIW: object ID space\n"); >> + } >> + if (id > 0) >> + return 0; >> + >> + return id == 0 ? -ENOSPC : id; >> +} >> + >> +int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) >> +{ >> + int rv = siw_add_obj(&sdev->lock, &sdev->qp_idr, &qp->hdr); >> + >> + if (!rv) { >> + qp->hdr.sdev = sdev; >> + siw_dbg_obj(qp, "new QP\n"); >> + } >> + return rv; >> +} >> + >> +int siw_cq_add(struct siw_device *sdev, struct siw_cq *cq) >> +{ >> + int rv = siw_add_obj(&sdev->lock, &sdev->cq_idr, &cq->hdr); >> + >> + if (!rv) { >> + cq->hdr.sdev = sdev; >> + siw_dbg_obj(cq, "new CQ\n"); >> + } >> + return rv; >> +} >> + >> +int siw_pd_add(struct siw_device *sdev, struct siw_pd *pd) >> +{ >> + int rv = siw_add_obj(&sdev->lock, &sdev->pd_idr, &pd->hdr); >> + >> + if (!rv) { >> + pd->hdr.sdev = sdev; >> + siw_dbg_obj(pd, "new PD\n"); >> + } >> + return rv; >> +} >> + >> +/* >> + * Stag lookup is based on its index part only (24 bits). >> + * The code avoids special Stag of zero and tries to randomize >> + * STag values between 1 and SIW_STAG_MAX_INDEX. >> + */ >> +int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) >> +{ >> + unsigned long flags; >> + int id, min_id, max_id = 0x00FFFFFF; >> + >> + do { >> + get_random_bytes(&min_id, 4); >> + min_id &= 0x00FFFFFF; >> + } while (min_id <= 0); >> +again: >> + spin_lock_irqsave(&sdev->lock, flags); >> + id = idr_alloc(&sdev->mem_idr, m, min_id, max_id, GFP_KERNEL); >> + spin_unlock_irqrestore(&sdev->lock, flags); >> + >> + if (id == -ENOMEM) >> + return -ENOMEM; >> + >> + if (id == -ENOSPC) { >> + max_id = min_id; >> + min_id /= 2; >> + if (min_id <= 0) { >> + pr_warn("SIW: memory ID space\n"); >> + return -ENOSPC; >> + } >> + goto again; >> + } >> + siw_objhdr_init(&m->hdr); >> + m->hdr.id = id; >> + m->hdr.sdev = sdev; >> + >> + siw_dbg_obj(m, "new MEM object\n"); >> + >> + return 0; >> +} >> + >> +void siw_remove_obj(spinlock_t *lock, struct idr *idr, >> + struct siw_objhdr *hdr) >> +{ >> + unsigned long flags; >> + >> + spin_lock_irqsave(lock, flags); >> + idr_remove(idr, hdr->id); >> + spin_unlock_irqrestore(lock, flags); >> +} >> + >> +/********** routines to put objs back and free if no ref left >*****/ >> + >> +void siw_free_cq(struct kref *ref) >> +{ >> + struct siw_cq *cq = >> + (container_of(container_of(ref, struct siw_objhdr, ref), >> + struct siw_cq, hdr)); >> + >> + siw_dbg_obj(cq, "free cq\n"); >> + >> + atomic_dec(&cq->hdr.sdev->num_cq); >> + if (cq->queue) >> + vfree(cq->queue); >> + kfree(cq); >> +} >> + >> +void siw_free_qp(struct kref *ref) >> +{ >> + struct siw_qp *qp = >> + container_of(container_of(ref, struct siw_objhdr, ref), >> + struct siw_qp, hdr); >> + struct siw_device *sdev = qp->hdr.sdev; >> + unsigned long flags; >> + >> + siw_dbg_obj(qp, "free qp\n"); >> + >> + if (qp->cep) >> + siw_cep_put(qp->cep); >> + >> + siw_remove_obj(&sdev->lock, &sdev->qp_idr, &qp->hdr); >> + >> + spin_lock_irqsave(&sdev->lock, flags); >> + list_del(&qp->devq); >> + spin_unlock_irqrestore(&sdev->lock, flags); >> + >> + if (qp->sendq) >> + vfree(qp->sendq); >> + if (qp->recvq) >> + vfree(qp->recvq); >> + if (qp->irq) >> + vfree(qp->irq); >> + if (qp->orq) >> + vfree(qp->orq); >> + >> + siw_put_tx_cpu(qp->tx_cpu); >> + >> + atomic_dec(&sdev->num_qp); >> + kfree(qp); >> +} >> + >> +void siw_free_pd(struct kref *ref) >> +{ >> + struct siw_pd *pd = >> + container_of(container_of(ref, struct siw_objhdr, ref), >> + struct siw_pd, hdr); >> + >> + siw_dbg_obj(pd, "free PD\n"); >> + >> + atomic_dec(&pd->hdr.sdev->num_pd); >> +} >> + >> +void siw_free_mem(struct kref *ref) >> +{ >> + struct siw_mem *m; >> + struct siw_device *sdev; >> + >> + m = container_of(container_of(ref, struct siw_objhdr, ref), >> + struct siw_mem, hdr); >> + sdev = m->hdr.sdev; >> + >> + siw_dbg_obj(m, "free mem\n"); >> + >> + atomic_dec(&sdev->num_mr); >> + >> + if (SIW_MEM_IS_MW(m)) { >> + struct siw_mw *mw = container_of(m, struct siw_mw, mem); >> + >> + kfree_rcu(mw, rcu); >> + } else { >> + struct siw_mr *mr = container_of(m, struct siw_mr, mem); >> + unsigned long flags; >> + >> + siw_dbg(m->hdr.sdev, "[MEM %d]: has pbl: %s\n", >> + OBJ_ID(m), mr->mem.is_pbl ? "y" : "n"); >> + >> + if (mr->pd) >> + siw_pd_put(mr->pd); >> + >> + if (mr->mem_obj) { >> + if (mr->mem.is_pbl == 0) >> + siw_umem_release(mr->umem); >> + else >> + siw_pbl_free(mr->pbl); >> + >> + } >> + spin_lock_irqsave(&sdev->lock, flags); >> + list_del(&mr->devq); >> + spin_unlock_irqrestore(&sdev->lock, flags); >> + >> + kfree_rcu(mr, rcu); >> + } >> +} >> + >> +void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) >> +{ >> + switch (op) { >> + >> + case SIW_OP_SEND: >> + case SIW_OP_WRITE: >> + case SIW_OP_SEND_WITH_IMM: >> + case SIW_OP_SEND_REMOTE_INV: >> + case SIW_OP_READ: >> + case SIW_OP_READ_LOCAL_INV: >> + if (!(wqe->sqe.flags & SIW_WQE_INLINE)) >> + siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); >> + break; >> + >> + case SIW_OP_RECEIVE: >> + siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); >> + break; >> + >> + case SIW_OP_READ_RESPONSE: >> + siw_unref_mem_sgl(wqe->mem, 1); >> + break; >> + >> + default: >> + /* >> + * SIW_OP_INVAL_STAG and SIW_OP_REG_MR >> + * do not hold memory references >> + */ >> + break; >> + } >> +} >> + >> +int siw_invalidate_stag(struct siw_pd *pd, u32 stag) >> +{ >> + u32 stag_idx = stag >> 8; >> + struct siw_mem *mem = siw_mem_id2obj(pd->hdr.sdev, stag_idx); >> + int rv = 0; >> + >> + if (unlikely(!mem)) { >> + siw_dbg(pd->hdr.sdev, "stag %u unknown\n", stag_idx); >> + return -EINVAL; >> + } >> + if (unlikely(siw_mem2mr(mem)->pd != pd)) { >> + siw_dbg(pd->hdr.sdev, "pd mismatch for stag %u\n", stag_idx); >> + rv = -EACCES; >> + goto out; >> + } >> + /* >> + * Per RDMA verbs definition, an STag may already be in invalid >> + * state if invalidation is requested. So no state check here. >> + */ >> + mem->stag_valid = 0; >> + >> + siw_dbg(pd->hdr.sdev, "stag %u now valid\n", stag_idx); >> +out: >> + siw_mem_put(mem); >> + return rv; >> +} >> diff --git a/drivers/infiniband/sw/siw/siw_obj.h >b/drivers/infiniband/sw/siw/siw_obj.h >> new file mode 100644 >> index 000000000000..ed0575c22283 >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_obj.h >> @@ -0,0 +1,226 @@ >> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >> +/* >> + * Software iWARP device driver >> + * >> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >> + * >> + * Copyright (c) 2008-2018, IBM Corporation >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the >GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the >following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >copyright notice, >> + * this list of conditions and the following disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >copyright >> + * notice, this list of conditions and the following >disclaimer in the >> + * documentation and/or other materials provided with the >distribution. >> + * >> + * - Neither the name of IBM nor the names of its contributors >may be >> + * used to endorse or promote products derived from this >software without >> + * specific prior written permission. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >THE >> + * SOFTWARE. >> + */ >> + >> +#ifndef _SIW_OBJ_H >> +#define _SIW_OBJ_H >> + >> +#include <linux/idr.h> >> +#include <linux/rwsem.h> >> +#include <linux/version.h> >> +#include <linux/sched.h> >> +#include <linux/semaphore.h> >> + >> +#include <rdma/ib_verbs.h> >> + >> +#include "siw_debug.h" >> + >> +extern void siw_free_qp(struct kref *ref); >> +extern void siw_free_pd(struct kref *ref); >> +extern void siw_free_cq(struct kref *ref); >> +extern void siw_free_mem(struct kref *ref); >> + >> +static inline struct siw_ucontext *to_siw_ctx(struct ib_ucontext >*base_ctx) >> +{ >> + return container_of(base_ctx, struct siw_ucontext, ib_ucontext); >> +} >> + >> +static inline struct siw_qp *to_siw_qp(struct ib_qp *base_qp) >> +{ >> + return container_of(base_qp, struct siw_qp, base_qp); >> +} >> + >> +static inline struct siw_cq *to_siw_cq(struct ib_cq *base_cq) >> +{ >> + return container_of(base_cq, struct siw_cq, base_cq); >> +} >> + >> +static inline struct siw_srq *to_siw_srq(struct ib_srq *base_srq) >> +{ >> + return container_of(base_srq, struct siw_srq, base_srq); >> +} >> + >> +static inline struct siw_device *to_siw_dev(struct ib_device >*base_dev) >> +{ >> + return container_of(base_dev, struct siw_device, base_dev); >> +} >> + >> +static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) >> +{ >> + return container_of(base_mr, struct siw_mr, base_mr); >> +} >> + >> +static inline struct siw_pd *to_siw_pd(struct ib_pd *base_pd) >> +{ >> + return container_of(base_pd, struct siw_pd, base_pd); >> +} >> + >> +static inline void siw_cq_get(struct siw_cq *cq) >> +{ >> + kref_get(&cq->hdr.ref); >> + siw_dbg_obj(cq, "new refcount: %d\n", kref_read(&cq->hdr.ref)); >> +} >> +static inline void siw_qp_get(struct siw_qp *qp) >> +{ >> + kref_get(&qp->hdr.ref); >> + siw_dbg_obj(qp, "new refcount: %d\n", kref_read(&qp->hdr.ref)); >> +} >> + >> +static inline void siw_pd_get(struct siw_pd *pd) >> +{ >> + kref_get(&pd->hdr.ref); >> + siw_dbg_obj(pd, "new refcount: %d\n", kref_read(&pd->hdr.ref)); >> +} >> + >> +static inline void siw_mem_get(struct siw_mem *mem) >> +{ >> + kref_get(&mem->hdr.ref); >> + siw_dbg_obj(mem, "new refcount: %d\n", kref_read(&mem->hdr.ref)); >> +} > >I think that all this kref code is not actually needed and IB/core >handles it. > I tend to agree for other objects than for user communication buffers. Memory regions must be protected by the provider during in-flight access operations such as RDMA Read's or Write's. The memory must not go away during those operations. I think same is true for associated objects like QP and PD - and it is cheaper to bump up a ref counter compared to taking locks. I agree, there is potential for simplification. I would like to have a closer look when the rdma core has been reshaped to explicitly control the lifetime of all other objects - as it is now already doing for PD's.