Fix races between ravb_tx_timeout_work() and functions of net_device_ops and ethtool_ops by using rtnl_trylock() and rtnl_unlock(). Note that since ravb_close() is under the rtnl lock and calls cancel_work_sync(), ravb_tx_timeout_work() should calls rtnl_trylock(). Otherwise, a deadlock may happen in ravb_tx_timeout_work() like below: CPU0 CPU1 ravb_tx_timeout() schedule_work() ... __dev_close_many() // Under rtnl lock ravb_close() cancel_work_sync() // Waiting ravb_tx_timeout_work() rtnl_lock() // This is possible to cause a deadlock And, if rtnl_trylock() fails and the netif is still running, rescheduling the work with 1 msec delayed. So, using schedule_delayed_work() instead of schedule_work(). Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@xxxxxxxxxxx> Reviewed-by: Sergey Shtylyov <s.shtylyov@xxxxxx> Reviewed-by: Simon Horman <horms@xxxxxxxxxx> --- Changes from v2: https://lore.kernel.org/netdev/20231019113308.1133944-1-yoshihiro.shimoda.uh@xxxxxxxxxxx/ - Add rescheduling if rtnl_trylock() fails and the netif is still running and update commit description for it. - Add Reviewed-by tags. Changes from v1: https://lore.kernel.org/all/20231017085341.813335-1-yoshihiro.shimoda.uh@xxxxxxxxxxx/ - Modify commit description. - Use goto in a error path. drivers/net/ethernet/renesas/ravb.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index e0f8276cffed..e9bb8ee3ba2d 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1081,7 +1081,7 @@ struct ravb_private { u32 cur_tx[NUM_TX_QUEUE]; u32 dirty_tx[NUM_TX_QUEUE]; struct napi_struct napi[NUM_RX_QUEUE]; - struct work_struct work; + struct delayed_work work; /* MII transceiver section. */ struct mii_bus *mii_bus; /* MDIO bus control */ int link; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c70cff80cc99..ca7db8a5b412 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1863,17 +1863,24 @@ static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue) /* tx_errors count up */ ndev->stats.tx_errors++; - schedule_work(&priv->work); + schedule_delayed_work(&priv->work, 0); } static void ravb_tx_timeout_work(struct work_struct *work) { - struct ravb_private *priv = container_of(work, struct ravb_private, + struct delayed_work *dwork = to_delayed_work(work); + struct ravb_private *priv = container_of(dwork, struct ravb_private, work); const struct ravb_hw_info *info = priv->info; struct net_device *ndev = priv->ndev; int error; + if (!rtnl_trylock()) { + if (netif_running(ndev)) + schedule_delayed_work(&priv->work, msecs_to_jiffies(10)); + return; + } + netif_tx_stop_all_queues(ndev); /* Stop PTP Clock driver */ @@ -1907,7 +1914,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) */ netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n", __func__, error); - return; + goto out_unlock; } ravb_emac_init(ndev); @@ -1917,6 +1924,9 @@ static void ravb_tx_timeout_work(struct work_struct *work) ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); + +out_unlock: + rtnl_unlock(); } /* Packet transmit function for Ethernet AVB */ @@ -2167,7 +2177,7 @@ static int ravb_close(struct net_device *ndev) of_phy_deregister_fixed_link(np); } - cancel_work_sync(&priv->work); + cancel_delayed_work_sync(&priv->work); if (info->multi_irqs) { free_irq(priv->tx_irqs[RAVB_NC], ndev); @@ -2687,7 +2697,7 @@ static int ravb_probe(struct platform_device *pdev) ndev->base_addr = res->start; spin_lock_init(&priv->lock); - INIT_WORK(&priv->work, ravb_tx_timeout_work); + INIT_DELAYED_WORK(&priv->work, ravb_tx_timeout_work); error = of_get_phy_mode(np, &priv->phy_interface); if (error && error != -ENODEV) -- 2.25.1