Re: [PATCH for-next v1 03/12] SIW network and RDMA core interface

Leon Romanovsky <leon@xxxxxxxxxx> · Mon, 10 Jun 2019 08:40:35 +0300

On Sun, May 26, 2019 at 01:41:47PM +0200, Bernard Metzler wrote:
> Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx>
> ---
>  drivers/infiniband/sw/siw/siw_main.c | 701 +++++++++++++++++++++++++++
>  1 file changed, 701 insertions(+)
>  create mode 100644 drivers/infiniband/sw/siw/siw_main.c
>
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> new file mode 100644
> index 000000000000..a9b8a5d2aaa3
> --- /dev/null
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -0,0 +1,701 @@
> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
> +
> +/* Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx> */
> +/* Copyright (c) 2008-2019, IBM Corporation */
> +
> +#include <linux/init.h>
> +#include <linux/errno.h>
> +#include <linux/netdevice.h>
> +#include <linux/inetdevice.h>
> +#include <net/net_namespace.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/if_arp.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/dma-mapping.h>
> +
> +#include <rdma/ib_verbs.h>
> +#include <rdma/ib_smi.h>
> +#include <rdma/ib_user_verbs.h>
> +#include <rdma/rdma_netlink.h>
> +#include <linux/kthread.h>
> +
> +#include "siw.h"
> +#include "siw_cm.h"
> +#include "siw_verbs.h"
> +#include "siw_debug.h"
> +
> +MODULE_AUTHOR("Bernard Metzler");
> +MODULE_DESCRIPTION("Software iWARP Driver");
> +MODULE_LICENSE("Dual BSD/GPL");
> +
> +/* transmit from user buffer, if possible */
> +const bool zcopy_tx = true;
> +
> +/* Restrict usage of GSO, if hardware peer iwarp is unable to process
> + * large packets. try_gso = true lets siw try to use local GSO,
> + * if peer agrees.  Not using GSO severly limits siw maximum tx bandwidth.
> + */
> +const bool try_gso;
> +
> +/* Attach siw also with loopback devices */
> +const bool loopback_enabled = true;
> +
> +/* We try to negotiate CRC on, if true */
> +const bool mpa_crc_required;
> +
> +/* MPA CRC on/off enforced */
> +const bool mpa_crc_strict;
> +
> +/* Control TCP_NODELAY socket option */
> +const bool siw_tcp_nagle;
> +
> +/* Select MPA version to be used during connection setup */
> +u_char mpa_version = MPA_REVISION_2;
> +
> +/* Selects MPA P2P mode (additional handshake during connection
> + * setup, if true.
> + */
> +const bool peer_to_peer;
> +
> +struct task_struct *siw_tx_thread[NR_CPUS];
> +struct crypto_shash *siw_crypto_shash;
> +
> +static int siw_device_register(struct siw_device *sdev, const char *name)
> +{
> +	struct ib_device *base_dev = &sdev->base_dev;
> +	static int dev_id = 1;
> +	int rv;
> +
> +	base_dev->driver_id = RDMA_DRIVER_SIW;
> +
> +	rv = ib_register_device(base_dev, name);
> +	if (rv) {
> +		pr_warn("siw: device registration error %d\n", rv);
> +		return rv;
> +	}
> +	sdev->vendor_part_id = dev_id++;
> +
> +	siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr);
> +
> +	return 0;
> +}
> +
> +static void siw_device_cleanup(struct ib_device *base_dev)
> +{
> +	struct siw_device *sdev = to_siw_dev(base_dev);
> +
> +	siw_dbg(base_dev, "Cleanup device\n");
> +
> +	if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) ||
> +	    atomic_read(&sdev->num_mr) || atomic_read(&sdev->num_cep) ||
> +	    atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) ||
> +	    atomic_read(&sdev->num_pd)) {
> +		pr_warn("siw at %s: orphaned resources!\n", sdev->netdev->name);
> +		pr_warn("           CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD %d\n",
> +			atomic_read(&sdev->num_ctx),
> +			atomic_read(&sdev->num_srq), atomic_read(&sdev->num_qp),
> +			atomic_read(&sdev->num_cq), atomic_read(&sdev->num_mr),
> +			atomic_read(&sdev->num_cep),
> +			atomic_read(&sdev->num_pd));
> +	}

We already talked about it, most of this code is redundant due to restrack and it should be removed.

> +	while (!list_empty(&sdev->cep_list)) {
> +		struct siw_cep *cep =
> +			list_entry(sdev->cep_list.next, struct siw_cep, devq);
> +		list_del(&cep->devq);
> +		pr_warn("siw: at %s: free orphaned CEP 0x%p, state %d\n",
> +			sdev->base_dev.name, cep, cep->state);

Does it mean that SIW leak memory? If cep can be not-empty at this
stage, what will be the purpose of pr_warn? If it can't, it is better
to find memory leak.

> +		kfree(cep);
> +	}
> +	xa_destroy(&sdev->qp_xa);
> +	xa_destroy(&sdev->mem_xa);
> +}
> +
> +static int siw_create_tx_threads(void)
> +{
> +	int cpu, rv, assigned = 0;
> +
> +	for_each_online_cpu(cpu) {
> +		/* Skip HT cores */
> +		if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) {
> +			siw_tx_thread[cpu] = NULL;
> +			continue;
> +		}
> +		siw_tx_thread[cpu] =
> +			kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
> +				       "siw_tx/%d", cpu);

I don't decide here, but just for the record, creating kernel threads
for every CPU is wrong.

> +		if (IS_ERR(siw_tx_thread[cpu])) {
> +			rv = PTR_ERR(siw_tx_thread[cpu]);
> +			siw_tx_thread[cpu] = NULL;
> +			pr_info("Creating TX thread for CPU %d failed", cpu);
> +			continue;
> +		}
> +		kthread_bind(siw_tx_thread[cpu], cpu);
> +
> +		wake_up_process(siw_tx_thread[cpu]);
> +		assigned++;
> +	}
> +	return assigned;
> +}
> +