Re: [PATCH rdma-next v2 09/11] RDMA/erdma: Add the erdma module

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 1/17/22 11:22 PM, Jason Gunthorpe wrote:
On Mon, Jan 17, 2022 at 04:48:26PM +0800, Cheng Xu wrote:
Add the main erdma module and debugfs files. The main module provides
interface to infiniband subsytem, and the debugfs module provides a way
to allow user can get the core status of the device and set the preferred
congestion control algorithm.

Signed-off-by: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx>
  drivers/infiniband/hw/erdma/erdma_main.c | 707 +++++++++++++++++++++++
  1 file changed, 707 insertions(+)
  create mode 100644 drivers/infiniband/hw/erdma/erdma_main.c

diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
new file mode 100644
index 000000000000..e35902a145b3
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/* Authors: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx> */
+/*          Kai Shen <kaishen@xxxxxxxxxxxxxxxxx> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_hw.h"
+#include "erdma_verbs.h"
+
+#define DESC __stringify(ElasticRDMA(iWarp) Driver)
+
+MODULE_AUTHOR("Alibaba");
+MODULE_DESCRIPTION(DESC);
+MODULE_LICENSE("GPL v2");
+
+/*Common string that is matched to accept the device by the user library*/
+#define ERDMA_NODE_DESC_COMMON "Elastic RDMA(iWARP) stack"
+
+static struct list_head erdma_dev_list = LIST_HEAD_INIT(erdma_dev_list);
+static DEFINE_MUTEX(erdma_dev_mutex);

No lists of devices in drivers. Use the core code to do it.

Will fix.


+static int erdma_newlink(const char *dev_name, struct net_device *netdev)
+{
+	struct ib_device *ibdev;
+	struct erdma_pci_drvdata *drvdata, *tmp;
+	struct erdma_dev *dev;
+	int ret;
+
+	ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_ERDMA);
+	if (ibdev) {
+		ib_device_put(ibdev);
+		return -EEXIST;
+	}
+
+	list_for_each_entry_safe(drvdata, tmp, &erdma_dev_list, list) {
+		if (erdma_netdev_matched_edev(netdev, drvdata) && !drvdata->is_registered) {
+			dev = erdma_ib_device_create(drvdata, netdev);
+			if (IS_ERR(dev)) {
+				pr_info("add device failed\n");
+				return PTR_ERR(dev);
+			}
+
+			if (netif_running(netdev) && netif_carrier_ok(netdev))
+				dev->state = IB_PORT_ACTIVE;
+			else
+				dev->state = IB_PORT_DOWN;
+
+			ret = erdma_device_register(dev, dev_name);
+			if (ret) {
+				ib_dealloc_device(&dev->ibdev);
+				return ret;
+			}
+
+			drvdata->is_registered = 1;
+			drvdata->dev = dev;
+
+			return 0;
+		}
+	}

Leaks a ibdev reference

Will fix.


+static int erdma_probe_dev(struct pci_dev *pdev)
+{
+	int err;
+	struct erdma_pci_drvdata *drvdata;
+	u32 version;
+	int bars;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
+		return err;
+	}
+
+	pci_set_master(pdev);
+
+	drvdata = kcalloc(1, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata) {
+		err = -ENOMEM;
+		goto err_disable_device;
+	}
+
+	pci_set_drvdata(pdev, drvdata);

No, the drvdata should be the struct that has the ibdevice, do not
have two structs like this.

The only use of drvdata should be in the remove function.

Yes, I will fix this.


+static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	return erdma_probe_dev(pdev);
+}

Don't make wrappers like this..

Will fix.


+static void erdma_remove(struct pci_dev *pdev)
+{
+	struct erdma_pci_drvdata *drvdata = pci_get_drvdata(pdev);
+
+	if (drvdata->is_registered) {
+		ib_unregister_device(&drvdata->dev->ibdev);
+		drvdata->is_registered = 0;
+	}
+
+	erdma_remove_dev(pdev);
+}

Or this

I will check it, and fix all of them.


+static __init int erdma_init_module(void)
+{
+	int ret;
+
+	ret = erdma_cm_init();
+	if (ret)
+		return ret;
+
+	ret = pci_register_driver(&erdma_pci_driver);
+	if (ret) {
+		pr_err("Couldn't register erdma driver.\n");
+		goto uninit_cm;
+	}
+
+	ret = register_netdevice_notifier(&erdma_netdev_nb);
+	if (ret)
+		goto unregister_driver;

And notifiers should not be registered without devices.

I'm confused about this. irdma/bnxt_re/siw/rxe register net notifiers in their module_init, and get their ibdev structures by function 'ib_device_get_by_netdev'. Other drivers (mlx4/mlx5/hns) register notifiers with devices.

The two ways have the same functionality, and register with modules has less notifiers with lots of IB devices.

Could you confirm this? And I will fix it if you are sure about it.


+static void __exit erdma_exit_module(void)
+{
+	rdma_link_unregister(&erdma_link_ops);
+	ib_unregister_driver(RDMA_DRIVER_ERDMA);

A PCI driver should not call this function, this is all messed up
because you are trying to dynamically create a PCI functions IB
device.

The PCI function should always create the IB device, and it may remain
in some inoperating state (eg no GID table entries) until the pair'd
netdev is found, and the lifecycle of the IB device should always
follow the PCI device.

Jason

Suggestion and tip are very nice, thank you. I will verify this, and use
it if it works.

Thanks,
Cheng Xu



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux