RE: [PATCH v3 03/13] Attach/detach SoftiWarp to/from network and RDMA subsystem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> Subject: [PATCH v3 03/13] Attach/detach SoftiWarp to/from network and RDMA
> subsystem
> 
> Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx>
> ---
>  drivers/infiniband/sw/siw/siw_main.c | 816
> +++++++++++++++++++++++++++++++++++
>  1 file changed, 816 insertions(+)
>  create mode 100644 drivers/infiniband/sw/siw/siw_main.c
> 
> diff --git a/drivers/infiniband/sw/siw/siw_main.c
> b/drivers/infiniband/sw/siw/siw_main.c
> new file mode 100644
> index 000000000000..1b7fc58d4eb9
> --- /dev/null
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -0,0 +1,816 @@
> +/*
> + * Software iWARP device driver
> + *
> + * Authors: Bernard Metzler <bmt@xxxxxxxxxxxxxx>
> + *
> + * Copyright (c) 2008-2017, IBM Corporation
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * BSD license below:
> + *
> + *   Redistribution and use in source and binary forms, with or
> + *   without modification, are permitted provided that the following
> + *   conditions are met:
> + *
> + *   - Redistributions of source code must retain the above copyright
notice,
> + *     this list of conditions and the following disclaimer.
> + *
> + *   - Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in
the
> + *     documentation and/or other materials provided with the
distribution.
> + *
> + *   - Neither the name of IBM nor the names of its contributors may be
> + *     used to endorse or promote products derived from this software
without
> + *     specific prior written permission.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
> IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> THE
> + * SOFTWARE.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/errno.h>
> +#include <linux/netdevice.h>
> +#include <linux/inetdevice.h>
> +#include <net/net_namespace.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/if_arp.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/dma-mapping.h>
> +
> +#include <rdma/ib_verbs.h>
> +#include <rdma/ib_smi.h>
> +#include <rdma/ib_user_verbs.h>
> +
> +#include "siw.h"
> +#include "siw_obj.h"
> +#include "siw_cm.h"
> +#include "siw_verbs.h"
> +#include <linux/kthread.h>
> +
> +MODULE_AUTHOR("Bernard Metzler");
> +MODULE_DESCRIPTION("Software iWARP Driver");
> +MODULE_LICENSE("Dual BSD/GPL");
> +MODULE_VERSION("0.2");
> +
> +/* transmit from user buffer, if possible */
> +const bool zcopy_tx;
> +
> +/* Restrict usage of GSO, if hardware peer iwarp is unable to process
> + * large packets. gso_seg_limit = 1 lets siw send only packets up to
> + * one real MTU in size, but severly limits maximum bandwidth.
> + * gso_seg_limit = 0 makes use of GSO (and more than doubles throughput
> + * for large transfers).
> + */
> +const int gso_seg_limit;
> +

The GSO configuration needs to default to enable interoperation with all
vendors (and comply with the RFCs).  So make it 1 please.

Jason, would configfs be a reasonable way to allow tweaking these globals?

> +/* Attach siw also with loopback devices */
> +const bool loopback_enabled = true;
> +

I think I asked this before.  Why have a knob to enable/disable loopback?

> +/* We try to negotiate CRC on, if true */
> +const bool mpa_crc_required;
> +
> +/* MPA CRC on/off enforced */
> +const bool mpa_crc_strict;
> +
> +/* Set TCP_NODELAY, and push messages asap */
> +const bool siw_lowdelay = true;
> +/* Set TCP_QUICKACK */
> +const bool tcp_quickack;
> +
> +/* Select MPA version to be used during connection setup */
> +u_char mpa_version = MPA_REVISION_2;
> +
> +/* Selects MPA P2P mode (additional handshake during connection
> + * setup, if true
> + */
> +const bool peer_to_peer;
> +
> +static LIST_HEAD(siw_devlist);
> +
> +struct task_struct *siw_tx_thread[NR_CPUS];
> +struct crypto_shash *siw_crypto_shash;
> +
> +static ssize_t show_sw_version(struct device *dev,
> +			       struct device_attribute *attr, char *buf)
> +{
> +	struct siw_device *sdev = container_of(dev, struct siw_device,
> +					       base_dev.dev);
> +
> +	return sprintf(buf, "%x\n", sdev->attrs.version);
> +}
> +
> +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL);
> +
> +static struct device_attribute *siw_dev_attributes[] = {
> +	&dev_attr_sw_version
> +};
> +
> +static int siw_modify_port(struct ib_device *base_dev, u8 port, int mask,
> +			   struct ib_port_modify *props)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static int siw_device_register(struct siw_device *sdev)
> +{
> +	struct ib_device *base_dev = &sdev->base_dev;
> +	int rv, i;
> +	static int dev_id = 1;
> +
> +	rv = ib_register_device(base_dev, NULL);
> +	if (rv) {
> +		pr_warn("siw: %s: registration error %d\n",
> +			base_dev->name, rv);
> +		return rv;
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) {
> +		rv = device_create_file(&base_dev->dev,
siw_dev_attributes[i]);
> +		if (rv) {
> +			pr_warn("siw: %s: create file error: rv=%d\n",
> +				base_dev->name, rv);
> +			ib_unregister_device(base_dev);
> +			return rv;
> +		}
> +	}
> +	siw_debugfs_add_device(sdev);
> +
> +	sdev->attrs.vendor_part_id = dev_id++;
> +
> +	siw_dbg(sdev, "HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n",
> +		*(u8 *)sdev->netdev->dev_addr,
> +		*((u8 *)sdev->netdev->dev_addr + 1),
> +		*((u8 *)sdev->netdev->dev_addr + 2),
> +		*((u8 *)sdev->netdev->dev_addr + 3),
> +		*((u8 *)sdev->netdev->dev_addr + 4),
> +		*((u8 *)sdev->netdev->dev_addr + 5));
> +
> +	sdev->is_registered = 1;
> +
> +	return 0;
> +}
> +
> +static void siw_device_deregister(struct siw_device *sdev)
> +{
> +	int i;
> +
> +	siw_debugfs_del_device(sdev);
> +
> +	if (sdev->is_registered) {
> +
> +		siw_dbg(sdev, "deregister\n");
> +
> +		for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i)
> +			device_remove_file(&sdev->base_dev.dev,
> +					   siw_dev_attributes[i]);
> +
> +		ib_unregister_device(&sdev->base_dev);
> +	}
> +	if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) ||
> +	    atomic_read(&sdev->num_mr) || atomic_read(&sdev->num_cep) ||
> +	    atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) ||
> +	    atomic_read(&sdev->num_pd)) {
> +		pr_warn("siw at %s: orphaned resources!\n",
> +			sdev->netdev->name);
> +		pr_warn("           CTX %d, SRQ %d, QP %d, CQ %d, MEM %d,
CEP
> %d, PD %d\n",
> +			atomic_read(&sdev->num_ctx),
> +			atomic_read(&sdev->num_srq),
> +			atomic_read(&sdev->num_qp),
> +			atomic_read(&sdev->num_cq),
> +			atomic_read(&sdev->num_mr),
> +			atomic_read(&sdev->num_cep),
> +			atomic_read(&sdev->num_pd));
> +	}
> +
> +	while (!list_empty(&sdev->cep_list)) {
> +		struct siw_cep *cep = list_entry(sdev->cep_list.next,
> +						 struct siw_cep, devq);
> +		list_del(&cep->devq);
> +		pr_warn("siw: at %s: free orphaned CEP 0x%p, state %d\n",
> +			sdev->base_dev.name, cep, cep->state);
> +		kfree(cep);
> +	}
> +	sdev->is_registered = 0;
> +}
> +
> +static void siw_device_destroy(struct siw_device *sdev)
> +{
> +	siw_dbg(sdev, "destroy device\n");
> +	siw_idr_release(sdev);
> +
> +	kfree(sdev->base_dev.iwcm);
> +	dev_put(sdev->netdev);
> +
> +	ib_dealloc_device(&sdev->base_dev);
> +}
> +
> +static struct siw_device *siw_dev_from_netdev(struct net_device *dev)
> +{
> +	if (!list_empty(&siw_devlist)) {
> +		struct list_head *pos;
> +
> +		list_for_each(pos, &siw_devlist) {
> +			struct siw_device *sdev =
> +				list_entry(pos, struct siw_device, list);
> +			if (sdev->netdev == dev)
> +				return sdev;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +static int siw_create_tx_threads(void)
> +{
> +	int cpu, rv, assigned = 0;
> +
> +	for_each_online_cpu(cpu) {
> +		/* Skip HT cores */
> +		if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) {
> +			siw_tx_thread[cpu] = NULL;
> +			continue;
> +		}
> +		siw_tx_thread[cpu] = kthread_create(siw_run_sq,
> +						   (unsigned long
*)(long)cpu,
> +						   "siw_tx/%d", cpu);
> +		if (IS_ERR(siw_tx_thread[cpu])) {
> +			rv = PTR_ERR(siw_tx_thread[cpu]);
> +			siw_tx_thread[cpu] = NULL;
> +			pr_info("Creating TX thread for CPU %d failed",
cpu);
> +			continue;
> +		}
> +		kthread_bind(siw_tx_thread[cpu], cpu);
> +
> +		wake_up_process(siw_tx_thread[cpu]);
> +		assigned++;
> +	}
> +	return assigned;
> +}
> +

I know in v2 review, you discussed the TX threads.  And you mentioned you
had tried workq threads [1], but the introduced lots of delay.  Have you
re-looked at the workq implementation?  If your analysis is several years
old, workq threads might provide what you need nowadays...

[1] https://www.spinics.net/lists/linux-rdma/msg55646.html

 
Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux