6.9-stable review patch. If anyone has any objections, please let me know. ------------------ From: Shay Drory <shayd@xxxxxxxxxx> [ Upstream commit 3c453e8cc672de1f9c662948dba43176bc68d7f0 ] The cited patch change register devlink flow, and neglect to reflect the changes for peer devlink set logic. Peer devlink set is triggering a call trace if done after devl_register.[1] Hence, align peer devlink set logic with register devlink flow. [1] WARNING: CPU: 4 PID: 3394 at net/devlink/core.c:155 devlink_rel_nested_in_add+0x177/0x180 CPU: 4 PID: 3394 Comm: kworker/u40:1 Not tainted 6.9.0-rc4_for_linust_min_debug_2024_04_16_14_08 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_vhca_event0 mlx5_vhca_state_work_handler [mlx5_core] RIP: 0010:devlink_rel_nested_in_add+0x177/0x180 Call Trace: <TASK> ? __warn+0x78/0x120 ? devlink_rel_nested_in_add+0x177/0x180 ? report_bug+0x16d/0x180 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? devlink_port_init+0x30/0x30 ? devlink_port_type_clear+0x50/0x50 ? devlink_rel_nested_in_add+0x177/0x180 ? devlink_rel_nested_in_add+0xdd/0x180 mlx5_sf_mdev_event+0x74/0xb0 [mlx5_core] notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_sf_dev_probe+0x185/0x3e0 [mlx5_core] auxiliary_bus_probe+0x38/0x80 ? driver_sysfs_add+0x51/0x80 really_probe+0xc5/0x3a0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x64f/0x860 __auxiliary_device_add+0x3b/0xa0 mlx5_sf_dev_add+0x139/0x330 [mlx5_core] mlx5_sf_dev_state_change_handler+0x1e4/0x250 [mlx5_core] notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_vhca_state_work_handler+0x151/0x200 [mlx5_core] process_one_work+0x13f/0x2e0 worker_thread+0x2bd/0x3c0 ? rescuer_thread+0x410/0x410 kthread+0xc4/0xf0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x50 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 </TASK> Fixes: bf729988303a ("net/mlx5: Restore mistakenly dropped parts in register devlink flow") Fixes: c6e77aa9dd82 ("net/mlx5: Register devlink first under devlink lock") Signed-off-by: Shay Drory <shayd@xxxxxxxxxx> Reviewed-by: Moshe Shemesh <moshe@xxxxxxxxxx> Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx> Reviewed-by: Simon Horman <horms@xxxxxxxxxx> Link: https://lore.kernel.org/r/20240509112951.590184-3-tariqt@xxxxxxxxxx Signed-off-by: Jakub Kicinski <kuba@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- .../net/ethernet/mellanox/mlx5/core/main.c | 14 +++++--------- .../mellanox/mlx5/core/sf/dev/driver.c | 19 ++++++++----------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 331ce47f51a17..6574c145dc1e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1680,6 +1680,8 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) struct devlink *devlink = priv_to_devlink(dev); int err; + devl_lock(devlink); + devl_register(devlink); dev->state = MLX5_DEVICE_STATE_UP; err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); if (err) { @@ -1693,27 +1695,21 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) goto query_hca_caps_err; } - devl_lock(devlink); - devl_register(devlink); - err = mlx5_devlink_params_register(priv_to_devlink(dev)); if (err) { mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); - goto params_reg_err; + goto query_hca_caps_err; } devl_unlock(devlink); return 0; -params_reg_err: - devl_unregister(devlink); - devl_unlock(devlink); query_hca_caps_err: - devl_unregister(devlink); - devl_unlock(devlink); mlx5_function_disable(dev, true); out: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + devl_unregister(devlink); + devl_unlock(devlink); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 7ebe712808275..b2986175d9afe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -60,6 +60,13 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } + /* Peer devlink logic expects to work on unregistered devlink instance. */ + err = mlx5_core_peer_devlink_set(sf_dev, devlink); + if (err) { + mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); + goto peer_devlink_set_err; + } + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) err = mlx5_init_one_light(mdev); else @@ -69,20 +76,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto init_one_err; } - err = mlx5_core_peer_devlink_set(sf_dev, devlink); - if (err) { - mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); - goto peer_devlink_set_err; - } - return 0; -peer_devlink_set_err: - if (mlx5_dev_is_lightweight(sf_dev->mdev)) - mlx5_uninit_one_light(sf_dev->mdev); - else - mlx5_uninit_one(sf_dev->mdev); init_one_err: +peer_devlink_set_err: iounmap(mdev->iseg); remap_err: mlx5_mdev_uninit(mdev); -- 2.43.0