Hi, Kory, On 12.12.2024 19:06, Kory Maincent wrote: > Introduce the description of a hwtstamp provider, mainly defined with a > the hwtstamp source and the phydev pointer. > > Add a hwtstamp provider description within the netdev structure to > allow saving the hwtstamp we want to use. This prepares for future > support of an ethtool netlink command to select the desired hwtstamp > provider. By default, the old API that does not support hwtstamp > selectability is used, meaning the hwtstamp provider pointer is unset. > > Signed-off-by: Kory Maincent <kory.maincent@xxxxxxxxxxx> I'm getting this error when doing suspend/resume on the Renesas RZ/G3S Smarc Module + RZ SMARC Carrier II board: [ 39.032969] ============================= [ 39.032983] WARNING: suspicious RCU usage [ 39.033000] 6.13.0-rc7-next-20250116-arm64-renesas-00002-g35245dfdc62c #7 Not tainted [ 39.033019] ----------------------------- [ 39.033033] drivers/net/phy/phy_device.c:2004 suspicious rcu_dereference_protected() usage! [ 39.033054] [ 39.033054] other info that might help us debug this: [ 39.033054] [ 39.033068] [ 39.033068] rcu_scheduler_active = 2, debug_locks = 1 [ 39.033084] 5 locks held by python3/174: [ 39.033100] #0: ffff00000ba8f3f0 (sb_writers#4){.+.+}-{0:0}, at: vfs_write+0x1b4/0x378 [ 39.033217] #1: ffff00000f828888 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xe8/0x1a8 [ 39.033321] #2: ffff00000ae3f4d8 (kn->active#47){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf0/0x1a8 [ 39.033418] #3: ffff800081910958 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x120/0x274 [ 39.033508] #4: ffff00000af8f8f8 (&dev->mutex){....}-{4:4}, at: device_suspend+0xf4/0x4dc [ 39.033597] [ 39.033597] stack backtrace: [ 39.033613] CPU: 0 UID: 0 PID: 174 Comm: python3 Not tainted 6.13.0-rc7-next-20250116-arm64-renesas-00002-g35245dfdc62c #7 [ 39.033623] Hardware name: Renesas SMARC EVK version 2 based on r9a08g045s33 (DT) [ 39.033628] Call trace: [ 39.033633] show_stack+0x14/0x1c (C) [ 39.033652] dump_stack_lvl+0xb4/0xc4 [ 39.033664] dump_stack+0x14/0x1c [ 39.033671] lockdep_rcu_suspicious+0x16c/0x22c [ 39.033682] phy_detach+0x160/0x190 [ 39.033694] phy_disconnect+0x40/0x54 [ 39.033703] ravb_close+0x6c/0x1cc [ 39.033714] ravb_suspend+0x48/0x120 [ 39.033721] dpm_run_callback+0x4c/0x14c [ 39.033731] device_suspend+0x11c/0x4dc [ 39.033740] dpm_suspend+0xdc/0x214 [ 39.033748] dpm_suspend_start+0x48/0x60 [ 39.033758] suspend_devices_and_enter+0x124/0x574 [ 39.033769] pm_suspend+0x1ac/0x274 [ 39.033778] state_store+0x88/0x124 [ 39.033788] kobj_attr_store+0x14/0x24 [ 39.033798] sysfs_kf_write+0x48/0x6c [ 39.033808] kernfs_fop_write_iter+0x118/0x1a8 [ 39.033817] vfs_write+0x27c/0x378 [ 39.033825] ksys_write+0x64/0xf4 [ 39.033833] __arm64_sys_write+0x18/0x20 [ 39.033841] invoke_syscall+0x44/0x104 [ 39.033852] el0_svc_common.constprop.0+0xb4/0xd4 [ 39.033862] do_el0_svc+0x18/0x20 [ 39.033870] el0_svc+0x3c/0xf0 [ 39.033880] el0t_64_sync_handler+0xc0/0xc4 [ 39.033888] el0t_64_sync+0x154/0x158 [ 39.041274] ravb 11c30000.ethernet eth0: Link is Down drivers/net/phy/phy_device.c:2004 is pointing to this commit. Haven't got the chance to investigate it, yet. It seems this commit is triggering it. Do you know if something needs to be changed now in the way the PHY is disconnected to avoid triggering this issue? Thank you, Claudiu > --- > > Change in v8: > - New patch > > Change in v10: > - Set hwtstamp in netdevice as a pointer for future use of rcu lock. > - Fix a nit in teh order of setting phydev pointer. > - Add a missing kdoc description. > > Change in v13: > - Remove an include from netdevices.h. > > Change in v16: > - Import the part of the patch 12 which belong to the hwtstamp provider > selectability of net core. > > Change in v18: > - Fix a doc NIT. > > Change in v20: > - Rework the hwtstamp provider design. Use hwtstamp source alongside > with phydev pointer instead. > --- > drivers/net/phy/phy_device.c | 10 ++++++++ > include/linux/net_tstamp.h | 29 +++++++++++++++++++++++ > include/linux/netdevice.h | 4 ++++ > include/uapi/linux/net_tstamp.h | 11 +++++++++ > net/core/dev_ioctl.c | 41 ++++++++++++++++++++++++++++++-- > net/core/timestamping.c | 52 +++++++++++++++++++++++++++++++++++++---- > 6 files changed, 140 insertions(+), 7 deletions(-) > > diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c > index b26bb33cd1d4..1a908af4175b 100644 > --- a/drivers/net/phy/phy_device.c > +++ b/drivers/net/phy/phy_device.c > @@ -32,6 +32,7 @@ > #include <linux/phy_link_topology.h> > #include <linux/pse-pd/pse.h> > #include <linux/property.h> > +#include <linux/ptp_clock_kernel.h> > #include <linux/rtnetlink.h> > #include <linux/sfp.h> > #include <linux/skbuff.h> > @@ -1998,6 +1999,15 @@ void phy_detach(struct phy_device *phydev) > > phy_suspend(phydev); > if (dev) { > + struct hwtstamp_provider *hwprov; > + > + hwprov = rtnl_dereference(dev->hwprov); > + /* Disable timestamp if it is the one selected */ > + if (hwprov && hwprov->phydev == phydev) { > + rcu_assign_pointer(dev->hwprov, NULL); > + kfree_rcu(hwprov, rcu_head); > + } > + > phydev->attached_dev->phydev = NULL; > phydev->attached_dev = NULL; > phy_link_topo_del_phy(dev, phydev); > diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h > index 662074b08c94..ff0758e88ea1 100644 > --- a/include/linux/net_tstamp.h > +++ b/include/linux/net_tstamp.h > @@ -19,6 +19,33 @@ enum hwtstamp_source { > HWTSTAMP_SOURCE_PHYLIB, > }; > > +/** > + * struct hwtstamp_provider_desc - hwtstamp provider description > + * > + * @index: index of the hwtstamp provider. > + * @qualifier: hwtstamp provider qualifier. > + */ > +struct hwtstamp_provider_desc { > + int index; > + enum hwtstamp_provider_qualifier qualifier; > +}; > + > +/** > + * struct hwtstamp_provider - hwtstamp provider object > + * > + * @rcu_head: RCU callback used to free the struct. > + * @source: source of the hwtstamp provider. > + * @phydev: pointer of the phydev source in case a PTP coming from phylib > + * @desc: hwtstamp provider description. > + */ > + > +struct hwtstamp_provider { > + struct rcu_head rcu_head; > + enum hwtstamp_source source; > + struct phy_device *phydev; > + struct hwtstamp_provider_desc desc; > +}; > + > /** > * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config > * > @@ -31,6 +58,7 @@ enum hwtstamp_source { > * copied the ioctl request back to user space > * @source: indication whether timestamps should come from the netdev or from > * an attached phylib PHY > + * @qualifier: qualifier of the hwtstamp provider > * > * Prefer using this structure for in-kernel processing of hardware > * timestamping configuration, over the inextensible struct hwtstamp_config > @@ -43,6 +71,7 @@ struct kernel_hwtstamp_config { > struct ifreq *ifr; > bool copied_to_user; > enum hwtstamp_source source; > + enum hwtstamp_provider_qualifier qualifier; > }; > > static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index d917949bba03..2593019ad5b1 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -82,6 +82,7 @@ struct xdp_metadata_ops; > struct xdp_md; > struct ethtool_netdev_state; > struct phy_link_topology; > +struct hwtstamp_provider; > > typedef u32 xdp_features_t; > > @@ -2045,6 +2046,7 @@ enum netdev_reg_state { > * > * @neighbours: List heads pointing to this device's neighbours' > * dev_list, one per address-family. > + * @hwprov: Tracks which PTP performs hardware packet time stamping. > * > * FIXME: cleanup struct net_device such that network protocol info > * moves out. > @@ -2457,6 +2459,8 @@ struct net_device { > > struct hlist_head neighbours[NEIGH_NR_TABLES]; > > + struct hwtstamp_provider __rcu *hwprov; > + > u8 priv[] ____cacheline_aligned > __counted_by(priv_len); > } ____cacheline_aligned; > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index 858339d1c1c4..55b0ab51096c 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -13,6 +13,17 @@ > #include <linux/types.h> > #include <linux/socket.h> /* for SO_TIMESTAMPING */ > > +/* > + * Possible type of hwtstamp provider. Mainly "precise" the default one > + * is for IEEE 1588 quality and "approx" is for NICs DMA point. > + */ > +enum hwtstamp_provider_qualifier { > + HWTSTAMP_PROVIDER_QUALIFIER_PRECISE, > + HWTSTAMP_PROVIDER_QUALIFIER_APPROX, > + > + HWTSTAMP_PROVIDER_QUALIFIER_CNT, > +}; > + > /* SO_TIMESTAMPING flags */ > enum { > SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), > diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c > index 1f09930fca26..087a57b7e4fa 100644 > --- a/net/core/dev_ioctl.c > +++ b/net/core/dev_ioctl.c > @@ -6,6 +6,7 @@ > #include <linux/rtnetlink.h> > #include <linux/net_tstamp.h> > #include <linux/phylib_stubs.h> > +#include <linux/ptp_clock_kernel.h> > #include <linux/wireless.h> > #include <linux/if_bridge.h> > #include <net/dsa_stubs.h> > @@ -269,6 +270,21 @@ static int dev_eth_ioctl(struct net_device *dev, > int dev_get_hwtstamp_phylib(struct net_device *dev, > struct kernel_hwtstamp_config *cfg) > { > + struct hwtstamp_provider *hwprov; > + > + hwprov = rtnl_dereference(dev->hwprov); > + if (hwprov) { > + cfg->qualifier = hwprov->desc.qualifier; > + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && > + hwprov->phydev) > + return phy_hwtstamp_get(hwprov->phydev, cfg); > + > + if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) > + return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); > + > + return -EOPNOTSUPP; > + } > + > if (phy_is_default_hwtstamp(dev->phydev)) > return phy_hwtstamp_get(dev->phydev, cfg); > > @@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, > struct netlink_ext_ack *extack) > { > const struct net_device_ops *ops = dev->netdev_ops; > - bool phy_ts = phy_is_default_hwtstamp(dev->phydev); > struct kernel_hwtstamp_config old_cfg = {}; > + struct hwtstamp_provider *hwprov; > + struct phy_device *phydev; > bool changed = false; > + bool phy_ts; > int err; > > + hwprov = rtnl_dereference(dev->hwprov); > + if (hwprov) { > + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && > + hwprov->phydev) { > + phy_ts = true; > + phydev = hwprov->phydev; > + } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) { > + phy_ts = false; > + } else { > + return -EOPNOTSUPP; > + } > + > + cfg->qualifier = hwprov->desc.qualifier; > + } else { > + phy_ts = phy_is_default_hwtstamp(dev->phydev); > + if (phy_ts) > + phydev = dev->phydev; > + } > + > cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; > > if (phy_ts && dev->see_all_hwtstamp_requests) { > @@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, > changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); > > if (phy_ts) { > - err = phy_hwtstamp_set(dev->phydev, cfg, extack); > + err = phy_hwtstamp_set(phydev, cfg, extack); > if (err) { > if (changed) > ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); > diff --git a/net/core/timestamping.c b/net/core/timestamping.c > index 3717fb152ecc..a50a7ef49ae8 100644 > --- a/net/core/timestamping.c > +++ b/net/core/timestamping.c > @@ -9,6 +9,7 @@ > #include <linux/ptp_classify.h> > #include <linux/skbuff.h> > #include <linux/export.h> > +#include <linux/ptp_clock_kernel.h> > > static unsigned int classify(const struct sk_buff *skb) > { > @@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb) > > void skb_clone_tx_timestamp(struct sk_buff *skb) > { > + struct hwtstamp_provider *hwprov; > struct mii_timestamper *mii_ts; > + struct phy_device *phydev; > struct sk_buff *clone; > unsigned int type; > > - if (!skb->sk || !skb->dev || > - !phy_is_default_hwtstamp(skb->dev->phydev)) > + if (!skb->sk || !skb->dev) > return; > > + rcu_read_lock(); > + hwprov = rcu_dereference(skb->dev->hwprov); > + if (hwprov) { > + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || > + !hwprov->phydev) { > + rcu_read_unlock(); > + return; > + } > + > + phydev = hwprov->phydev; > + } else { > + phydev = skb->dev->phydev; > + if (!phy_is_default_hwtstamp(phydev)) { > + rcu_read_unlock(); > + return; > + } > + } > + rcu_read_unlock(); > + > type = classify(skb); > if (type == PTP_CLASS_NONE) > return; > > - mii_ts = skb->dev->phydev->mii_ts; > + mii_ts = phydev->mii_ts; > if (likely(mii_ts->txtstamp)) { > clone = skb_clone_sk(skb); > if (!clone) > @@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); > > bool skb_defer_rx_timestamp(struct sk_buff *skb) > { > + struct hwtstamp_provider *hwprov; > struct mii_timestamper *mii_ts; > + struct phy_device *phydev; > unsigned int type; > > - if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) > + if (!skb->dev) > return false; > > + rcu_read_lock(); > + hwprov = rcu_dereference(skb->dev->hwprov); > + if (hwprov) { > + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || > + !hwprov->phydev) { > + rcu_read_unlock(); > + return false; > + } > + > + phydev = hwprov->phydev; > + } else { > + phydev = skb->dev->phydev; > + if (!phy_is_default_hwtstamp(phydev)) { > + rcu_read_unlock(); > + return false; > + } > + } > + rcu_read_unlock(); > + > if (skb_headroom(skb) < ETH_HLEN) > return false; > > @@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) > if (type == PTP_CLASS_NONE) > return false; > > - mii_ts = skb->dev->phydev->mii_ts; > + mii_ts = phydev->mii_ts; > if (likely(mii_ts->rxtstamp)) > return mii_ts->rxtstamp(mii_ts, skb, type); > >