Re: [PATCH 1/2] net: dsa: RCU-protect dsa_ptr in struct net_device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/10/24 06:03, 'A. Sverdlin' via BCM-KERNEL-FEEDBACK-LIST,PDL wrote:
From: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxxxx>

There are multiple races of zeroing dsa_ptr in struct net_device (on
shutdown/remove) against asynchronous dereferences all over the net
code. Widespread pattern is as follows:

CPU0					CPU1
if (netdev_uses_dsa())
					dev->dsa_ptr = NULL;
         dev->dsa_ptr->...

One of the possible crashes:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
CPU: 0 PID: 12 Comm: ksoftirqd/0 Tainted: G O 6.1.99+ #1
pc : lan9303_rcv
lr : lan9303_rcv
Call trace:
  lan9303_rcv
  dsa_switch_rcv
  __netif_receive_skb_list_core
  netif_receive_skb_list_internal
  napi_gro_receive
  fec_enet_rx_napi
  __napi_poll
  net_rx_action
...

RCU-protect dsa_ptr and use rcu_dereference() or rtnl_dereference()
depending on the calling context.

Rename netdev_uses_dsa() into __netdev_uses_dsa_currently()
(assumes ether RCU or RTNL lock held) and netdev_uses_dsa_currently()
variants which better reflect the uselessness of the function's
return value, which becomes outdated right after the call.

Fixes: ee534378f005 ("net: dsa: fix panic when DSA master device unbinds on shutdown")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxxxx>

Thanks for doing this work, just a few nits below. This is likely to be difficult to backport to stable trees.

---
  drivers/net/dsa/mt7530.c                    |   3 +-
  drivers/net/dsa/ocelot/felix.c              |   3 +-
  drivers/net/dsa/qca/qca8k-8xxx.c            |   3 +-
  drivers/net/ethernet/broadcom/bcmsysport.c  |   8 +-
  drivers/net/ethernet/mediatek/airoha_eth.c  |   2 +-
  drivers/net/ethernet/mediatek/mtk_eth_soc.c |  22 +++--
  drivers/net/ethernet/mediatek/mtk_ppe.c     |  15 ++-
  include/linux/netdevice.h                   |   2 +-
  include/net/dsa.h                           |  36 +++++--
  include/net/dsa_stubs.h                     |   6 +-
  net/bridge/br_input.c                       |   2 +-
  net/core/dev.c                              |   3 +-
  net/core/flow_dissector.c                   |  19 ++--
  net/dsa/conduit.c                           |  66 ++++++++-----
  net/dsa/dsa.c                               |  19 ++--
  net/dsa/port.c                              |   3 +-
  net/dsa/tag.c                               |   3 +-
  net/dsa/tag.h                               |  19 ++--
  net/dsa/tag_8021q.c                         |  10 +-
  net/dsa/tag_brcm.c                          |   2 +-
  net/dsa/tag_dsa.c                           |   8 +-
  net/dsa/tag_qca.c                           |  10 +-
  net/dsa/tag_sja1105.c                       |  22 +++--
  net/dsa/user.c                              | 104 +++++++++++---------
  net/ethernet/eth.c                          |   2 +-
  25 files changed, 240 insertions(+), 152 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index ec18e68bf3a8..82d3f1786156 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -20,6 +20,7 @@
  #include <linux/reset.h>
  #include <linux/gpio/consumer.h>
  #include <linux/gpio/driver.h>
+#include <linux/rtnetlink.h>
  #include <net/dsa.h>
#include "mt7530.h"
@@ -3092,7 +3093,7 @@ mt753x_conduit_state_change(struct dsa_switch *ds,
  			    const struct net_device *conduit,
  			    bool operational)
  {
-	struct dsa_port *cpu_dp = conduit->dsa_ptr;
+	struct dsa_port *cpu_dp = rtnl_dereference(conduit->dsa_ptr);

Out of curiosity, only sparse will likely be able to recognize the __rcu annotation of net_device::dsa_ptr so there is still unfortunately room for programmers to forget about using rtnl_dereference(), this should hopefully be caught by CI when patches are submitted.

  	struct mt7530_priv *priv = ds->priv;
  	int val = 0;
  	u8 mask;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 4a705f7333f4..f6bc0ff0c116 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -21,6 +21,7 @@
  #include <linux/of_net.h>
  #include <linux/pci.h>
  #include <linux/of.h>
+#include <linux/rtnetlink.h>
  #include <net/pkt_sched.h>
  #include <net/dsa.h>
  #include "felix.h"
@@ -57,7 +58,7 @@ static int felix_cpu_port_for_conduit(struct dsa_switch *ds,
  		return lag;
  	}
- cpu_dp = conduit->dsa_ptr;
+	cpu_dp = rtnl_dereference(conduit->dsa_ptr);
  	return cpu_dp->index;
  }
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index f8d8c70642c4..10b4d7e9be2f 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -20,6 +20,7 @@
  #include <linux/gpio/consumer.h>
  #include <linux/etherdevice.h>
  #include <linux/dsa/tag_qca.h>
+#include <linux/rtnetlink.h>
#include "qca8k.h"
  #include "qca8k_leds.h"
@@ -1754,7 +1755,7 @@ static void
  qca8k_conduit_change(struct dsa_switch *ds, const struct net_device *conduit,
  		     bool operational)
  {
-	struct dsa_port *dp = conduit->dsa_ptr;
+	struct dsa_port *dp = rtnl_dereference(conduit->dsa_ptr);
  	struct qca8k_priv *priv = ds->priv;
/* Ethernet MIB/MDIO is only supported for CPU port 0 */
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index c9faa8540859..bd9bc081346d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -145,7 +145,7 @@ static void bcm_sysport_set_rx_csum(struct net_device *dev,
  	 * sure we tell the RXCHK hardware to expect a 4-bytes Broadcom
  	 * tag after the Ethernet MAC Source Address.
  	 */
-	if (netdev_uses_dsa(dev))
+	if (__netdev_uses_dsa_currently(dev))

I appreciate the thought given into the function name, though I am not sure it is warranted to suffix with _currently().

  		reg |= RXCHK_BRCM_TAG_EN;
  	else
  		reg &= ~RXCHK_BRCM_TAG_EN;
@@ -173,7 +173,7 @@ static void bcm_sysport_set_tx_csum(struct net_device *dev,
  	 * checksum to be computed correctly when using VLAN HW acceleration,
  	 * else it has no effect, so it can always be turned on.
  	 */
-	if (netdev_uses_dsa(dev))
+	if (__netdev_uses_dsa_currently(dev))
  		reg |= tdma_control_bit(priv, SW_BRCM_TAG);
  	else
  		reg &= ~tdma_control_bit(priv, SW_BRCM_TAG);
@@ -1950,7 +1950,7 @@ static inline void gib_set_pad_extension(struct bcm_sysport_priv *priv)
reg = gib_readl(priv, GIB_CONTROL);
  	/* Include Broadcom tag in pad extension and fix up IPG_LENGTH */
-	if (netdev_uses_dsa(priv->netdev)) {
+	if (__netdev_uses_dsa_currently(priv->netdev)) {
  		reg &= ~(GIB_PAD_EXTENSION_MASK << GIB_PAD_EXTENSION_SHIFT);
  		reg |= ENET_BRCM_TAG_LEN << GIB_PAD_EXTENSION_SHIFT;
  	}
@@ -2299,7 +2299,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
  	struct bcm_sysport_tx_ring *tx_ring;
  	unsigned int q, port;
- if (!netdev_uses_dsa(dev))
+	if (!__netdev_uses_dsa_currently(dev))
  		return netdev_pick_tx(dev, skb, NULL);
/* DSA tagging layer will have configured the correct queue */
diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c
index 1c5b85a86df1..f7425d393b22 100644
--- a/drivers/net/ethernet/mediatek/airoha_eth.c
+++ b/drivers/net/ethernet/mediatek/airoha_eth.c
@@ -2255,7 +2255,7 @@ static int airoha_dev_open(struct net_device *dev)
  	if (err)
  		return err;
- if (netdev_uses_dsa(dev))
+	if (__netdev_uses_dsa_currently(dev))
  		airoha_fe_set(eth, REG_GDM_INGRESS_CFG(port->id),
  			      GDM_STAG_EN_MASK);
  	else
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 16ca427cf4c3..82a828349323 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -24,6 +24,7 @@
  #include <linux/pcs/pcs-mtk-lynxi.h>
  #include <linux/jhash.h>
  #include <linux/bitfield.h>
+#include <linux/rcupdate.h>
  #include <net/dsa.h>
  #include <net/dst_metadata.h>
  #include <net/page_pool/helpers.h>
@@ -1375,7 +1376,8 @@ static void mtk_tx_set_dma_desc_v2(struct net_device *dev, void *txd,
  		/* tx checksum offload */
  		if (info->csum)
  			data |= TX_DMA_CHKSUM_V2;
-		if (mtk_is_netsys_v3_or_greater(eth) && netdev_uses_dsa(dev))
+		if (mtk_is_netsys_v3_or_greater(eth) &&
+		    __netdev_uses_dsa_currently(dev))
  			data |= TX_DMA_SPTAG_V3;
  	}
  	WRITE_ONCE(desc->txd5, data);
@@ -2183,7 +2185,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
  		 * hardware treats the MTK special tag as a VLAN and untags it.
  		 */
  		if (mtk_is_netsys_v1(eth) && (trxd.rxd2 & RX_DMA_VTAG) &&
-		    netdev_uses_dsa(netdev)) {
+		    __netdev_uses_dsa_currently(netdev)) {
  			unsigned int port = RX_DMA_VPID(trxd.rxd3) & GENMASK(2, 0);
if (port < ARRAY_SIZE(eth->dsa_meta) &&
@@ -3304,7 +3306,7 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 id, u32 config)
val |= config; - if (eth->netdev[id] && netdev_uses_dsa(eth->netdev[id]))
+	if (eth->netdev[id] && __netdev_uses_dsa_currently(eth->netdev[id]))
  		val |= MTK_GDMA_SPECIAL_TAG;
mtk_w32(eth, val, MTK_GDMA_FWD_CFG(id));
@@ -3313,12 +3315,16 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 id, u32 config)
static bool mtk_uses_dsa(struct net_device *dev)
  {
+	bool ret = false;
  #if IS_ENABLED(CONFIG_NET_DSA)
-	return netdev_uses_dsa(dev) &&
-	       dev->dsa_ptr->tag_ops->proto == DSA_TAG_PROTO_MTK;
-#else
-	return false;
+	struct dsa_port *dp;
+
+	rcu_read_lock();
+	dp = rcu_dereference(dev->dsa_ptr);
+	ret = dp && dp->tag_ops->proto == DSA_TAG_PROTO_MTK;
+	rcu_read_unlock();

This pattern repeats in mtk_ppe.c, a possible factoring for later.
--
Florian




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux