Hi, On 08.01.2021 02:16, David Collins wrote: > The final step in regulator_register() is to call > regulator_resolve_supply() for each registered regulator > (including the one in the process of being registered). The > regulator_resolve_supply() function first checks if rdev->supply > is NULL, then it performs various steps to try to find the supply. > If successful, rdev->supply is set inside of set_supply(). > > This procedure can encounter a race condition if two concurrent > tasks call regulator_register() near to each other on separate CPUs > and one of the regulators has rdev->supply_name specified. There > is currently nothing guaranteeing atomicity between the rdev->supply > check and set steps. Thus, both tasks can observe rdev->supply==NULL > in their regulator_resolve_supply() calls. This then results in > both creating a struct regulator for the supply. One ends up > actually stored in rdev->supply and the other is lost (though still > present in the supply's consumer_list). > > Here is a kernel log snippet showing the issue: > > [ 12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level > [ 12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level > [ 12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent > '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level' > already present! > > Avoid this race condition by holding the rdev->mutex lock inside > of regulator_resolve_supply() while checking and setting > rdev->supply. > > Signed-off-by: David Collins <collinsd@xxxxxxxxxxxxxx> This patch landed in linux next-20210112 as commit eaa7995c529b ("regulator: core: avoid regulator_resolve_supply() race condition"). I found that it triggers a following lockdep warning during the DWC3 driver registration on some Exynos based boards (this log is from Samsung Exynos5420-based Peach-Pit board): ====================================================== WARNING: possible circular locking dependency detected 5.11.0-rc1-00008-geaa7995c529b #10095 Not tainted ------------------------------------------------------ swapper/0/1 is trying to acquire lock: c12e1b80 (regulator_list_mutex){+.+.}-{3:3}, at: regulator_lock_dependent+0x4c/0x2b0 but task is already holding lock: df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at: regulator_resolve_supply+0x44/0x318 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (regulator_ww_class_mutex){+.+.}-{3:3}: ww_mutex_lock+0x48/0x88 regulator_lock_recursive+0x84/0x1f4 regulator_lock_dependent+0x184/0x2b0 regulator_enable+0x30/0xe4 dwc3_exynos_probe+0x17c/0x2c0 platform_probe+0x80/0xc0 really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 device_driver_attach+0x58/0x60 __driver_attach+0xfc/0x160 bus_for_each_dev+0x6c/0xb8 bus_add_driver+0x170/0x20c driver_register+0x78/0x10c do_one_initcall+0x88/0x438 kernel_init_freeable+0x18c/0x1dc kernel_init+0x8/0x118 ret_from_fork+0x14/0x38 0x0 -> #1 (regulator_ww_class_acquire){+.+.}-{0:0}: regulator_enable+0x30/0xe4 dwc3_exynos_probe+0x17c/0x2c0 platform_probe+0x80/0xc0 really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 device_driver_attach+0x58/0x60 __driver_attach+0xfc/0x160 bus_for_each_dev+0x6c/0xb8 bus_add_driver+0x170/0x20c driver_register+0x78/0x10c do_one_initcall+0x88/0x438 kernel_init_freeable+0x18c/0x1dc kernel_init+0x8/0x118 ret_from_fork+0x14/0x38 0x0 -> #0 (regulator_list_mutex){+.+.}-{3:3}: lock_acquire+0x2e4/0x5dc __mutex_lock+0xa4/0xb60 mutex_lock_nested+0x1c/0x24 regulator_lock_dependent+0x4c/0x2b0 regulator_enable+0x30/0xe4 regulator_resolve_supply+0x1cc/0x318 regulator_register_resolve_supply+0x14/0x78 class_for_each_device+0x68/0xe8 regulator_register+0xa2c/0xc9c devm_regulator_register+0x40/0x70 tps65090_regulator_probe+0x150/0x648 platform_probe+0x80/0xc0 really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 bus_for_each_drv+0x78/0xbc __device_attach+0xe8/0x180 bus_probe_device+0x88/0x90 device_add+0x4c4/0x7e8 platform_device_add+0x120/0x25c mfd_add_devices+0x580/0x60c tps65090_i2c_probe+0xb8/0x184 i2c_device_probe+0x234/0x2a4 really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 bus_for_each_drv+0x78/0xbc __device_attach+0xe8/0x180 bus_probe_device+0x88/0x90 device_add+0x4c4/0x7e8 i2c_new_client_device+0x15c/0x27c of_i2c_register_devices+0x114/0x184 i2c_register_adapter+0x1d8/0x6dc ec_i2c_probe+0xc8/0x124 platform_probe+0x80/0xc0 really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 bus_for_each_drv+0x78/0xbc __device_attach+0xe8/0x180 bus_probe_device+0x88/0x90 device_add+0x4c4/0x7e8 of_platform_device_create_pdata+0x90/0xc8 of_platform_bus_create+0x1a0/0x4ec of_platform_populate+0x88/0x120 devm_of_platform_populate+0x40/0x80 cros_ec_register+0x174/0x308 cros_ec_spi_probe+0x16c/0x1ec spi_probe+0x88/0xac really_probe+0x1c4/0x4e4 driver_probe_device+0x78/0x1d8 device_driver_attach+0x58/0x60 __driver_attach+0xfc/0x160 bus_for_each_dev+0x6c/0xb8 bus_add_driver+0x170/0x20c driver_register+0x78/0x10c do_one_initcall+0x88/0x438 kernel_init_freeable+0x18c/0x1dc kernel_init+0x8/0x118 ret_from_fork+0x14/0x38 0x0 other info that might help us debug this: Chain exists of: regulator_list_mutex --> regulator_ww_class_acquire --> regulator_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(regulator_ww_class_mutex); lock(regulator_ww_class_acquire); lock(regulator_ww_class_mutex); lock(regulator_list_mutex); *** DEADLOCK *** 5 locks held by swapper/0/1: #0: dfb6e4c8 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x60 #1: c1fedcd8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180 #2: df53a4e8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180 #3: df5224d8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180 #4: df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at: regulator_resolve_supply+0x44/0x318 stack backtrace: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc1-00008-geaa7995c529b #10095 Hardware name: Samsung Exynos (Flattened Device Tree) [<c01116e8>] (unwind_backtrace) from [<c010cf58>] (show_stack+0x10/0x14) [<c010cf58>] (show_stack) from [<c0b38ffc>] (dump_stack+0xa4/0xc4) [<c0b38ffc>] (dump_stack) from [<c0193458>] (check_noncircular+0x14c/0x164) [<c0193458>] (check_noncircular) from [<c0196b90>] (__lock_acquire+0x1830/0x31cc) [<c0196b90>] (__lock_acquire) from [<c01991e4>] (lock_acquire+0x2e4/0x5dc) [<c01991e4>] (lock_acquire) from [<c0b4043c>] (__mutex_lock+0xa4/0xb60) [<c0b4043c>] (__mutex_lock) from [<c0b40f14>] (mutex_lock_nested+0x1c/0x24) [<c0b40f14>] (mutex_lock_nested) from [<c05ccd94>] (regulator_lock_dependent+0x4c/0x2b0) [<c05ccd94>] (regulator_lock_dependent) from [<c05d220c>] (regulator_enable+0x30/0xe4) [<c05d220c>] (regulator_enable) from [<c05d248c>] (regulator_resolve_supply+0x1cc/0x318) [<c05d248c>] (regulator_resolve_supply) from [<c05d2974>] (regulator_register_resolve_supply+0x14/0x78) [<c05d2974>] (regulator_register_resolve_supply) from [<c06a3000>] (class_for_each_device+0x68/0xe8) [<c06a3000>] (class_for_each_device) from [<c05d3e20>] (regulator_register+0xa2c/0xc9c) [<c05d3e20>] (regulator_register) from [<c05d5c70>] (devm_regulator_register+0x40/0x70) [<c05d5c70>] (devm_regulator_register) from [<c05dea58>] (tps65090_regulator_probe+0x150/0x648) [<c05dea58>] (tps65090_regulator_probe) from [<c06a3fe8>] (platform_probe+0x80/0xc0) [<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4) [<c06a1114>] (really_probe) from [<c06a14ac>] (driver_probe_device+0x78/0x1d8) [<c06a14ac>] (driver_probe_device) from [<c069f1a4>] (bus_for_each_drv+0x78/0xbc) [<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] (__device_attach+0xe8/0x180) [<c06a0eb0>] (__device_attach) from [<c069ff50>] (bus_probe_device+0x88/0x90) [<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8) [<c069dbac>] (device_add) from [<c06a3bac>] (platform_device_add+0x120/0x25c) [<c06a3bac>] (platform_device_add) from [<c06d5c7c>] (mfd_add_devices+0x580/0x60c) [<c06d5c7c>] (mfd_add_devices) from [<c06d80e8>] (tps65090_i2c_probe+0xb8/0x184) [<c06d80e8>] (tps65090_i2c_probe) from [<c0822520>] (i2c_device_probe+0x234/0x2a4) [<c0822520>] (i2c_device_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4) [<c06a1114>] (really_probe) from [<c06a14ac>] (driver_probe_device+0x78/0x1d8) [<c06a14ac>] (driver_probe_device) from [<c069f1a4>] (bus_for_each_drv+0x78/0xbc) [<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] (__device_attach+0xe8/0x180) [<c06a0eb0>] (__device_attach) from [<c069ff50>] (bus_probe_device+0x88/0x90) [<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8) [<c069dbac>] (device_add) from [<c0824aec>] (i2c_new_client_device+0x15c/0x27c) [<c0824aec>] (i2c_new_client_device) from [<c08285e0>] (of_i2c_register_devices+0x114/0x184) [<c08285e0>] (of_i2c_register_devices) from [<c08254b8>] (i2c_register_adapter+0x1d8/0x6dc) [<c08254b8>] (i2c_register_adapter) from [<c082dd1c>] (ec_i2c_probe+0xc8/0x124) [<c082dd1c>] (ec_i2c_probe) from [<c06a3fe8>] (platform_probe+0x80/0xc0) [<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4) [<c06a1114>] (really_probe) from [<c06a14ac>] (driver_probe_device+0x78/0x1d8) [<c06a14ac>] (driver_probe_device) from [<c069f1a4>] (bus_for_each_drv+0x78/0xbc) [<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] (__device_attach+0xe8/0x180) [<c06a0eb0>] (__device_attach) from [<c069ff50>] (bus_probe_device+0x88/0x90) [<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8) [<c069dbac>] (device_add) from [<c08b140c>] (of_platform_device_create_pdata+0x90/0xc8) [<c08b140c>] (of_platform_device_create_pdata) from [<c08b15f0>] (of_platform_bus_create+0x1a0/0x4ec) [<c08b15f0>] (of_platform_bus_create) from [<c08b1af0>] (of_platform_populate+0x88/0x120) [<c08b1af0>] (of_platform_populate) from [<c08b1bdc>] (devm_of_platform_populate+0x40/0x80) [<c08b1bdc>] (devm_of_platform_populate) from [<c08b72fc>] (cros_ec_register+0x174/0x308) [<c08b72fc>] (cros_ec_register) from [<c08b868c>] (cros_ec_spi_probe+0x16c/0x1ec) [<c08b868c>] (cros_ec_spi_probe) from [<c071b2f4>] (spi_probe+0x88/0xac) [<c071b2f4>] (spi_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4) [<c06a1114>] (really_probe) from [<c06a14ac>] (driver_probe_device+0x78/0x1d8) [<c06a14ac>] (driver_probe_device) from [<c06a19c4>] (device_driver_attach+0x58/0x60) [<c06a19c4>] (device_driver_attach) from [<c06a1ac8>] (__driver_attach+0xfc/0x160) [<c06a1ac8>] (__driver_attach) from [<c069f0cc>] (bus_for_each_dev+0x6c/0xb8) [<c069f0cc>] (bus_for_each_dev) from [<c06a0204>] (bus_add_driver+0x170/0x20c) [<c06a0204>] (bus_add_driver) from [<c06a2968>] (driver_register+0x78/0x10c) [<c06a2968>] (driver_register) from [<c0102428>] (do_one_initcall+0x88/0x438) [<c0102428>] (do_one_initcall) from [<c1101104>] (kernel_init_freeable+0x18c/0x1dc) [<c1101104>] (kernel_init_freeable) from [<c0b3c65c>] (kernel_init+0x8/0x118) [<c0b3c65c>] (kernel_init) from [<c010011c>] (ret_from_fork+0x14/0x38) Exception stack(0xc1ce3fb0 to 0xc1ce3ff8) 3fa0: 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 I didn't analyze it yet if this warning is really an issue or just a false positive. If you have any hints or comments let me know. > --- > drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++----------- > 1 file changed, 28 insertions(+), 11 deletions(-) > > diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c > index fee9241..3ae5ccd 100644 > --- a/drivers/regulator/core.c > +++ b/drivers/regulator/core.c > @@ -1813,23 +1813,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) > { > struct regulator_dev *r; > struct device *dev = rdev->dev.parent; > - int ret; > + int ret = 0; > > /* No supply to resolve? */ > if (!rdev->supply_name) > return 0; > > - /* Supply already resolved? */ > + /* Supply already resolved? (fast-path without locking contention) */ > if (rdev->supply) > return 0; > > + /* > + * Recheck rdev->supply with rdev->mutex lock held to avoid a race > + * between rdev->supply null check and setting rdev->supply in > + * set_supply() from concurrent tasks. > + */ > + regulator_lock(rdev); > + > + /* Supply just resolved by a concurrent task? */ > + if (rdev->supply) > + goto out; > + > r = regulator_dev_lookup(dev, rdev->supply_name); > if (IS_ERR(r)) { > ret = PTR_ERR(r); > > /* Did the lookup explicitly defer for us? */ > if (ret == -EPROBE_DEFER) > - return ret; > + goto out; > > if (have_full_constraints()) { > r = dummy_regulator_rdev; > @@ -1837,15 +1848,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) > } else { > dev_err(dev, "Failed to resolve %s-supply for %s\n", > rdev->supply_name, rdev->desc->name); > - return -EPROBE_DEFER; > + ret = -EPROBE_DEFER; > + goto out; > } > } > > if (r == rdev) { > dev_err(dev, "Supply for %s (%s) resolved to itself\n", > rdev->desc->name, rdev->supply_name); > - if (!have_full_constraints()) > - return -EINVAL; > + if (!have_full_constraints()) { > + ret = -EINVAL; > + goto out; > + } > r = dummy_regulator_rdev; > get_device(&r->dev); > } > @@ -1859,7 +1873,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) > if (r->dev.parent && r->dev.parent != rdev->dev.parent) { > if (!device_is_bound(r->dev.parent)) { > put_device(&r->dev); > - return -EPROBE_DEFER; > + ret = -EPROBE_DEFER; > + goto out; > } > } > > @@ -1867,13 +1882,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) > ret = regulator_resolve_supply(r); > if (ret < 0) { > put_device(&r->dev); > - return ret; > + goto out; > } > > ret = set_supply(rdev, r); > if (ret < 0) { > put_device(&r->dev); > - return ret; > + goto out; > } > > /* > @@ -1886,11 +1901,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) > if (ret < 0) { > _regulator_put(rdev->supply); > rdev->supply = NULL; > - return ret; > + goto out; > } > } > > - return 0; > +out: > + regulator_unlock(rdev); > + return ret; > } > > /* Internal regulator request function */ Best regards -- Marek Szyprowski, PhD Samsung R&D Institute Poland