On Tue, Apr 09, 2024 at 05:57:16AM -0700, Breno Leitao wrote: > It is impossible to use init_dummy_netdev together with alloc_netdev() > as the 'setup' argument. > > This is because alloc_netdev() initializes some fields in the net_device > structure, and later init_dummy_netdev() memzero them all. This causes > some problems as reported here: > > https://lore.kernel.org/all/20240322082336.49f110cc@xxxxxxxxxx/ > > Split the init_dummy_netdev() function in two. Create a new function called > init_dummy_netdev_core() that does not memzero the net_device structure. > Then have init_dummy_netdev() memzero-ing and calling > init_dummy_netdev_core(), keeping the old behaviour. > > init_dummy_netdev_core() is the new function that could be called as an > argument for alloc_netdev(). > > Also, create a helper to allocate and initialize dummy net devices, > leveraging init_dummy_netdev_core() as the setup argument. This function > basically simplify the allocation of dummy devices, by allocating and > initializing it. Freeing the device continue to be done through > free_netdev() > > Suggested-by: Jakub Kicinski <kuba@xxxxxxxxxx> > Signed-off-by: Breno Leitao <leitao@xxxxxxxxxx> Reviewed-by: Ido Schimmel <idosch@xxxxxxxxxx> We were about to submit another user of init_dummy_netdev() when I noticed this patch. Converted the code to use alloc_netdev_dummy() [1] and it seems to be working fine. Will submit after your patch is accepted. See a few minor comments below. [...] > +/** > + * init_dummy_netdev - init a dummy network device for NAPI > + * @dev: device to init > + * > + * This takes a network device structure and initialize the minimum s/initialize/initializes/ > + * amount of fields so it can be used to schedule NAPI polls without > + * registering a full blown interface. This is to be used by drivers > + * that need to tie several hardware interfaces to a single NAPI > + * poll scheduler due to HW limitations. > + */ > +void init_dummy_netdev(struct net_device *dev) > +{ > + /* Clear everything. Note we don't initialize spinlocks > + * are they aren't supposed to be taken by any of the I assume you meant s/are/as/ ? > + * NAPI code and this dummy netdev is supposed to be > + * only ever used for NAPI polls > + */ > + memset(dev, 0, sizeof(struct net_device)); > + init_dummy_netdev_core(dev); > +} > +EXPORT_SYMBOL_GPL(init_dummy_netdev); [1] diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index db2950baf6b4..bf66d996e32e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -132,20 +132,40 @@ struct mlxsw_pci { u8 num_cqs; /* Number of CQs */ u8 num_sdqs; /* Number of SDQs */ bool skip_reset; - struct net_device napi_dev_tx; - struct net_device napi_dev_rx; + struct net_device *napi_dev_tx; + struct net_device *napi_dev_rx; }; -static void mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) { - init_dummy_netdev(&mlxsw_pci->napi_dev_tx); - strscpy(mlxsw_pci->napi_dev_tx.name, "mlxsw_tx", - sizeof(mlxsw_pci->napi_dev_tx.name)); + int err; + + mlxsw_pci->napi_dev_tx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_tx) + return -ENOMEM; + strscpy(mlxsw_pci->napi_dev_tx->name, "mlxsw_tx", + sizeof(mlxsw_pci->napi_dev_tx->name)); + + mlxsw_pci->napi_dev_rx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_rx) { + err = -ENOMEM; + goto err_alloc_rx; + } + strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", + sizeof(mlxsw_pci->napi_dev_rx->name)); + dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + + return 0; - init_dummy_netdev(&mlxsw_pci->napi_dev_rx); - strscpy(mlxsw_pci->napi_dev_rx.name, "mlxsw_rx", - sizeof(mlxsw_pci->napi_dev_rx.name)); - dev_set_threaded(&mlxsw_pci->napi_dev_rx, true); +err_alloc_rx: + free_netdev(mlxsw_pci->napi_dev_tx); + return err; +} + +static void mlxsw_pci_napi_devs_fini(struct mlxsw_pci *mlxsw_pci) +{ + free_netdev(mlxsw_pci->napi_dev_rx); + free_netdev(mlxsw_pci->napi_dev_tx); } static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, @@ -804,11 +824,11 @@ static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q, switch (cq_type) { case MLXSW_PCI_CQ_SDQ: - netif_napi_add(&mlxsw_pci->napi_dev_tx, &q->u.cq.napi, + netif_napi_add(mlxsw_pci->napi_dev_tx, &q->u.cq.napi, mlxsw_pci_napi_poll_cq_tx); break; case MLXSW_PCI_CQ_RDQ: - netif_napi_add(&mlxsw_pci->napi_dev_rx, &q->u.cq.napi, + netif_napi_add(mlxsw_pci->napi_dev_rx, &q->u.cq.napi, mlxsw_pci_napi_poll_cq_rx); break; } @@ -1793,7 +1813,10 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_requery_resources; - mlxsw_pci_napi_devs_init(mlxsw_pci); + err = mlxsw_pci_napi_devs_init(mlxsw_pci); + if (err) + goto err_napi_devs_init; + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); if (err) goto err_aqs_init; @@ -1811,6 +1834,8 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: + mlxsw_pci_napi_devs_fini(mlxsw_pci); +err_napi_devs_init: err_requery_resources: err_config_profile: err_cqe_v_check: @@ -1838,6 +1863,7 @@ static void mlxsw_pci_fini(void *bus_priv) free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); mlxsw_pci_aqs_fini(mlxsw_pci); + mlxsw_pci_napi_devs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_free_irq_vectors(mlxsw_pci); }