-----"Jason Gunthorpe" <jgg@xxxxxxxx> wrote: ----- >To: "Bernard Metzler" <bmt@xxxxxxxxxxxxxx> >From: "Jason Gunthorpe" <jgg@xxxxxxxx> >Date: 02/22/2019 01:14AM >Cc: linux-rdma@xxxxxxxxxxxxxxx >Subject: Re: [PATCH v5 03/13] SIW network and RDMA core interface > >On Tue, Feb 19, 2019 at 11:08:53AM +0100, Bernard Metzler wrote: >> +static struct siw_device *siw_device_create(struct net_device >*netdev) >> +{ >> + struct siw_device *sdev; >> + struct ib_device *base_dev; >> + struct device *parent = netdev->dev.parent; >> + int rv; >> + >> + sdev = ib_alloc_device(siw_device, base_dev); >> + if (!sdev) >> + goto out; >> + >> + base_dev = &sdev->base_dev; >> + >> + if (!parent) { >> + /* >> + * The loopback device has no parent device, >> + * so it appears as a top-level device. To support >> + * loopback device connectivity, take this device >> + * as the parent device. Skip all other devices >> + * w/o parent device. >> + */ >> + if (netdev->type != ARPHRD_LOOPBACK) { >> + pr_warn("siw: device %s skipped (no parent dev)\n", >> + netdev->name); >> + ib_dealloc_device(base_dev); >> + sdev = NULL; > >This should have the ib_dealloc_device in the goto error section Right, thanks. Also true for other cases in that function, as you point out below. Will simplify accordingly. > >> + goto out; >> + } >> + parent = &netdev->dev; >> + } >> + base_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); >> + if (!base_dev->iwcm) { >> + ib_dealloc_device(base_dev); >> + sdev = NULL; >> + goto out; >> + } >> + >> + sdev->netdev = netdev; >> + >> + memset(&base_dev->node_guid, 0, sizeof(base_dev->node_guid)); > >base_dev is already zero'd Okay, will remove. > >> + ib_set_device_ops(base_dev, &siw_device_ops); >> + rv = ib_device_set_netdev(base_dev, netdev, 1); >> + if (rv) { >> + kfree(base_dev->iwcm); >> + ib_dealloc_device(base_dev); >> + sdev = NULL; >> + goto out; > >Same comment > >> + } >> + base_dev->iwcm->connect = siw_connect; >> + base_dev->iwcm->accept = siw_accept; >> + base_dev->iwcm->reject = siw_reject; >> + base_dev->iwcm->create_listen = siw_create_listen; >> + base_dev->iwcm->destroy_listen = siw_destroy_listen; >> + base_dev->iwcm->add_ref = siw_qp_get_ref; >> + base_dev->iwcm->rem_ref = siw_qp_put_ref; >> + base_dev->iwcm->get_qp = siw_get_base_qp; >> + >> + /* Disable TCP port mapper service */ >> + base_dev->iwcm->driver_flags = IW_F_NO_PORT_MAP; >> + >> + memcpy(base_dev->iwcm->ifname, netdev->name, >> + sizeof(base_dev->iwcm->ifname)); >> + >> + sdev->attrs.max_qp = SIW_MAX_QP; >> + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; >> + sdev->attrs.max_ord = SIW_MAX_ORD_QP; >> + sdev->attrs.max_ird = SIW_MAX_IRD_QP; >> + sdev->attrs.max_sge = SIW_MAX_SGE; >> + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; >> + sdev->attrs.max_cq = SIW_MAX_CQ; >> + sdev->attrs.max_cqe = SIW_MAX_CQE; >> + sdev->attrs.max_mr = SIW_MAX_MR; >> + sdev->attrs.max_pd = SIW_MAX_PD; >> + sdev->attrs.max_mw = SIW_MAX_MW; >> + sdev->attrs.max_fmr = SIW_MAX_FMR; >> + sdev->attrs.max_srq = SIW_MAX_SRQ; >> + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; >> + sdev->attrs.max_srq_sge = SIW_MAX_SGE; >> + >> + siw_idr_init(sdev); > >Why idr_init/release functions? Seems unnecessary idr structs are allocated for each dynamically allocated siw device (one device per link). So each device has its own id space (e.g. for STags/Keys). So I think I need to initialize/destroy the idr resources dynamically as well. > >> +/* >> + * Network link becomes unavailable. Mark all >> + * affected QP's accordingly. >> + */ >> +static void siw_netdev_down(struct work_struct *work) >> +{ >> + struct siw_device *sdev = container_of(work, struct siw_device, >> + netdev_down); >> + >> + struct siw_qp_attrs qp_attrs; >> + struct list_head *pos, *tmp; >> + >> + memset(&qp_attrs, 0, sizeof(qp_attrs)); >> + qp_attrs.state = SIW_QP_STATE_ERROR; >> + >> + list_for_each_safe(pos, tmp, &sdev->qp_list) { >> + struct siw_qp *qp = list_entry(pos, struct siw_qp, devq); >> + >> + down_write(&qp->state_lock); >> + (void) siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE); > >I always wonder if '(void) ' casts really want to be WARN_ONs.. > Sounds good. Will do. >> + up_write(&qp->state_lock); >> + } >> + ib_device_put(&sdev->base_dev); > >Not super excited to see the device refount held into a work .. This >could create deadlock if you are careless... > >> +static void siw_device_goes_down(struct siw_device *sdev) >> +{ >> + if (ib_device_try_get(&sdev->base_dev)) { >> + INIT_WORK(&sdev->netdev_down, siw_netdev_down); >> + schedule_work(&sdev->netdev_down); > >.. ie here it should be the system unbound wq, but that would >deadlock >with ib_unregister_device_queued .. > Hmm, can you please help me understanding that concern? I agree I should check if schedule_work() succeeds (I have to fix that, but it should never happen), but otherwise? How can it deadlock? >> + } >> +} >> + >> +static int siw_netdev_event(struct notifier_block *nb, unsigned >long event, >> + void *arg) >> +{ >> + struct net_device *netdev = netdev_notifier_info_to_dev(arg); >> + struct ib_device *base_dev; >> + struct siw_device *sdev; >> + >> + dev_dbg(&netdev->dev, "siw: event %lu\n", event); >> + >> + if (dev_net(netdev) != &init_net) >> + return NOTIFY_OK; >> + >> + base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW); >> + if (!base_dev) >> + return NOTIFY_OK; >> + >> + sdev = to_siw_dev(base_dev); >> + >> + switch (event) { >> + >> + case NETDEV_UP: >> + sdev->state = IB_PORT_ACTIVE; >> + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); >> + break; >> + >> + case NETDEV_GOING_DOWN: >> + siw_device_goes_down(sdev); >> + break; > >This probably wants to be return as there is a double put >otherwise. Never tested? > I tested bringing down interfaces, also while there is siw caused traffic on it. siw_device_goes_down() calls ib_device_try_get(), which schedules work only if it got that additional reference (which then get's put in the work handler). So I don't see a potential double put case (yet)...? >> +static int siw_newlink(const char *basedev_name, struct net_device >*netdev) >> +{ >> + struct ib_device *base_dev; >> + struct siw_device *sdev; >> + int rv; >> + >> + base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW); >> + if (base_dev) { >> + ib_device_put(base_dev); >> + rv = -EEXIST; >> + goto out; >> + } >> + if (!siw_dev_qualified(netdev)) { >> + rv = -EINVAL; >> + goto out; >> + } >> + sdev = siw_device_create(netdev); >> + if (sdev) { >> + dev_dbg(&netdev->dev, "siw: new device\n"); >> + >> + if (netif_running(netdev) && netif_carrier_ok(netdev)) >> + sdev->state = IB_PORT_ACTIVE; >> + else >> + sdev->state = IB_PORT_DOWN; >> + >> + rv = siw_device_register(sdev, basedev_name); >> + if (rv) { >> + siw_device_cleanup(&sdev->base_dev); >> + ib_dealloc_device(&sdev->base_dev); >> + goto out; > >consistently use goto unwinds everywhere? OK, let's do that. > >> +static void __exit siw_exit_module(void) >> +{ >> + int nr_cpu; >> + >> + for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { >> + if (siw_tx_thread[nr_cpu]) { >> + siw_stop_tx_thread(nr_cpu); >> + siw_tx_thread[nr_cpu] = NULL; >> + } >> + } > >Make sure this doesn't deadlock with ib_unregister_driver.. Is >forward >progress of unregister guarenteed if the tx threads are stopped? I thought yes, but better let me think through it again and test that. > >> + unregister_netdevice_notifier(&siw_netdev_nb); >> + rdma_link_unregister(&siw_link_ops); >> + ib_unregister_driver(RDMA_DRIVER_SIW); >> + >> + siw_cm_exit(); >> + >> + if (siw_crypto_shash) >> + crypto_free_shash(siw_crypto_shash); >> + >> + siw_destroy_cpulist(); >> + >> + pr_info("SoftiWARP detached\n"); >> +} >> + >> +module_init(siw_init_module); >> +module_exit(siw_exit_module); >> + >> +MODULE_ALIAS_RDMA_LINK("siw"); > >