-----"Leon Romanovsky" <leon@xxxxxxxxxx> wrote: ----- >To: "Bernard Metzler" <bmt@xxxxxxxxxxxxxx> >From: "Leon Romanovsky" <leon@xxxxxxxxxx> >Date: 06/10/2019 07:40AM >Cc: linux-rdma@xxxxxxxxxxxxxxx >Subject: [EXTERNAL] Re: [PATCH for-next v1 03/12] SIW network and >RDMA core interface > >On Sun, May 26, 2019 at 01:41:47PM +0200, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx> >> --- >> drivers/infiniband/sw/siw/siw_main.c | 701 >+++++++++++++++++++++++++++ >> 1 file changed, 701 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_main.c >> >> diff --git a/drivers/infiniband/sw/siw/siw_main.c >b/drivers/infiniband/sw/siw/siw_main.c >> new file mode 100644 >> index 000000000000..a9b8a5d2aaa3 >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_main.c >> @@ -0,0 +1,701 @@ >> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >> + >> +/* Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> */ >> +/* Copyright (c) 2008-2019, IBM Corporation */ >> + >> +#include <linux/init.h> >> +#include <linux/errno.h> >> +#include <linux/netdevice.h> >> +#include <linux/inetdevice.h> >> +#include <net/net_namespace.h> >> +#include <linux/rtnetlink.h> >> +#include <linux/if_arp.h> >> +#include <linux/list.h> >> +#include <linux/kernel.h> >> +#include <linux/dma-mapping.h> >> + >> +#include <rdma/ib_verbs.h> >> +#include <rdma/ib_smi.h> >> +#include <rdma/ib_user_verbs.h> >> +#include <rdma/rdma_netlink.h> >> +#include <linux/kthread.h> >> + >> +#include "siw.h" >> +#include "siw_cm.h" >> +#include "siw_verbs.h" >> +#include "siw_debug.h" >> + >> +MODULE_AUTHOR("Bernard Metzler"); >> +MODULE_DESCRIPTION("Software iWARP Driver"); >> +MODULE_LICENSE("Dual BSD/GPL"); >> + >> +/* transmit from user buffer, if possible */ >> +const bool zcopy_tx = true; >> + >> +/* Restrict usage of GSO, if hardware peer iwarp is unable to >process >> + * large packets. try_gso = true lets siw try to use local GSO, >> + * if peer agrees. Not using GSO severly limits siw maximum tx >bandwidth. >> + */ >> +const bool try_gso; >> + >> +/* Attach siw also with loopback devices */ >> +const bool loopback_enabled = true; >> + >> +/* We try to negotiate CRC on, if true */ >> +const bool mpa_crc_required; >> + >> +/* MPA CRC on/off enforced */ >> +const bool mpa_crc_strict; >> + >> +/* Control TCP_NODELAY socket option */ >> +const bool siw_tcp_nagle; >> + >> +/* Select MPA version to be used during connection setup */ >> +u_char mpa_version = MPA_REVISION_2; >> + >> +/* Selects MPA P2P mode (additional handshake during connection >> + * setup, if true. >> + */ >> +const bool peer_to_peer; >> + >> +struct task_struct *siw_tx_thread[NR_CPUS]; >> +struct crypto_shash *siw_crypto_shash; >> + >> +static int siw_device_register(struct siw_device *sdev, const char >*name) >> +{ >> + struct ib_device *base_dev = &sdev->base_dev; >> + static int dev_id = 1; >> + int rv; >> + >> + base_dev->driver_id = RDMA_DRIVER_SIW; >> + >> + rv = ib_register_device(base_dev, name); >> + if (rv) { >> + pr_warn("siw: device registration error %d\n", rv); >> + return rv; >> + } >> + sdev->vendor_part_id = dev_id++; >> + >> + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); >> + >> + return 0; >> +} >> + >> +static void siw_device_cleanup(struct ib_device *base_dev) >> +{ >> + struct siw_device *sdev = to_siw_dev(base_dev); >> + >> + siw_dbg(base_dev, "Cleanup device\n"); >> + >> + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || >> + atomic_read(&sdev->num_mr) || atomic_read(&sdev->num_cep) || >> + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || >> + atomic_read(&sdev->num_pd)) { >> + pr_warn("siw at %s: orphaned resources!\n", sdev->netdev->name); >> + pr_warn(" CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP >%d, PD %d\n", >> + atomic_read(&sdev->num_ctx), >> + atomic_read(&sdev->num_srq), atomic_read(&sdev->num_qp), >> + atomic_read(&sdev->num_cq), atomic_read(&sdev->num_mr), >> + atomic_read(&sdev->num_cep), >> + atomic_read(&sdev->num_pd)); >> + } > >We already talked about it, most of this code is redundant due to >restrack and it should be removed. yes. removed. > >> + while (!list_empty(&sdev->cep_list)) { >> + struct siw_cep *cep = >> + list_entry(sdev->cep_list.next, struct siw_cep, devq); >> + list_del(&cep->devq); >> + pr_warn("siw: at %s: free orphaned CEP 0x%p, state %d\n", >> + sdev->base_dev.name, cep, cep->state); > >Does it mean that SIW leak memory? If cep can be not-empty at this >stage, what will be the purpose of pr_warn? If it can't, it is better >to find memory leak. > Hmm, I would not offer that driver if I would be aware of a mem leak. Older versions of rdma midlayer did not reliably call reject in case an application got killed within the accept phase (new connection signaled but not yet accepted/rejected by application). siw ended up witha lingering connection endpoint which gets freed here. I re-checked and that issue got obviously fixed. So I remove that code as well. Thanks! >> + kfree(cep); >> + } >> + xa_destroy(&sdev->qp_xa); >> + xa_destroy(&sdev->mem_xa); >> +} >> + >> +static int siw_create_tx_threads(void) >> +{ >> + int cpu, rv, assigned = 0; >> + >> + for_each_online_cpu(cpu) { >> + /* Skip HT cores */ >> + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) { >> + siw_tx_thread[cpu] = NULL; >> + continue; >> + } >> + siw_tx_thread[cpu] = >> + kthread_create(siw_run_sq, (unsigned long *)(long)cpu, >> + "siw_tx/%d", cpu); > >I don't decide here, but just for the record, creating kernel threads >for every CPU is wrong. > >> + if (IS_ERR(siw_tx_thread[cpu])) { >> + rv = PTR_ERR(siw_tx_thread[cpu]); >> + siw_tx_thread[cpu] = NULL; >> + pr_info("Creating TX thread for CPU %d failed", cpu); >> + continue; >> + } >> + kthread_bind(siw_tx_thread[cpu], cpu); >> + >> + wake_up_process(siw_tx_thread[cpu]); >> + assigned++; >> + } >> + return assigned; >> +} >> + > >