Re: [PATCH] regulator: core: avoid regulator_resolve_supply() race condition

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 08.01.2021 02:16, David Collins wrote:
> The final step in regulator_register() is to call
> regulator_resolve_supply() for each registered regulator
> (including the one in the process of being registered).  The
> regulator_resolve_supply() function first checks if rdev->supply
> is NULL, then it performs various steps to try to find the supply.
> If successful, rdev->supply is set inside of set_supply().
>
> This procedure can encounter a race condition if two concurrent
> tasks call regulator_register() near to each other on separate CPUs
> and one of the regulators has rdev->supply_name specified.  There
> is currently nothing guaranteeing atomicity between the rdev->supply
> check and set steps.  Thus, both tasks can observe rdev->supply==NULL
> in their regulator_resolve_supply() calls.  This then results in
> both creating a struct regulator for the supply.  One ends up
> actually stored in rdev->supply and the other is lost (though still
> present in the supply's consumer_list).
>
> Here is a kernel log snippet showing the issue:
>
> [   12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level
> [   12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level
> [   12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent
>                 '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level'
>                 already present!
>
> Avoid this race condition by holding the rdev->mutex lock inside
> of regulator_resolve_supply() while checking and setting
> rdev->supply.
>
> Signed-off-by: David Collins <collinsd@xxxxxxxxxxxxxx>

This patch landed in linux next-20210112 as commit eaa7995c529b 
("regulator: core: avoid regulator_resolve_supply() race condition"). I 
found that it triggers a following lockdep warning during the DWC3 
driver registration on some Exynos based boards (this log is from 
Samsung Exynos5420-based Peach-Pit board):

======================================================
WARNING: possible circular locking dependency detected
5.11.0-rc1-00008-geaa7995c529b #10095 Not tainted
------------------------------------------------------
swapper/0/1 is trying to acquire lock:
c12e1b80 (regulator_list_mutex){+.+.}-{3:3}, at: 
regulator_lock_dependent+0x4c/0x2b0

but task is already holding lock:
df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at: 
regulator_resolve_supply+0x44/0x318

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #2 (regulator_ww_class_mutex){+.+.}-{3:3}:
        ww_mutex_lock+0x48/0x88
        regulator_lock_recursive+0x84/0x1f4
        regulator_lock_dependent+0x184/0x2b0
        regulator_enable+0x30/0xe4
        dwc3_exynos_probe+0x17c/0x2c0
        platform_probe+0x80/0xc0
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        device_driver_attach+0x58/0x60
        __driver_attach+0xfc/0x160
        bus_for_each_dev+0x6c/0xb8
        bus_add_driver+0x170/0x20c
        driver_register+0x78/0x10c
        do_one_initcall+0x88/0x438
        kernel_init_freeable+0x18c/0x1dc
        kernel_init+0x8/0x118
        ret_from_fork+0x14/0x38
        0x0

-> #1 (regulator_ww_class_acquire){+.+.}-{0:0}:
        regulator_enable+0x30/0xe4
        dwc3_exynos_probe+0x17c/0x2c0
        platform_probe+0x80/0xc0
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        device_driver_attach+0x58/0x60
        __driver_attach+0xfc/0x160
        bus_for_each_dev+0x6c/0xb8
        bus_add_driver+0x170/0x20c
        driver_register+0x78/0x10c
        do_one_initcall+0x88/0x438
        kernel_init_freeable+0x18c/0x1dc
        kernel_init+0x8/0x118
        ret_from_fork+0x14/0x38
        0x0

-> #0 (regulator_list_mutex){+.+.}-{3:3}:
        lock_acquire+0x2e4/0x5dc
        __mutex_lock+0xa4/0xb60
        mutex_lock_nested+0x1c/0x24
        regulator_lock_dependent+0x4c/0x2b0
        regulator_enable+0x30/0xe4
        regulator_resolve_supply+0x1cc/0x318
        regulator_register_resolve_supply+0x14/0x78
        class_for_each_device+0x68/0xe8
        regulator_register+0xa2c/0xc9c
        devm_regulator_register+0x40/0x70
        tps65090_regulator_probe+0x150/0x648
        platform_probe+0x80/0xc0
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        bus_for_each_drv+0x78/0xbc
        __device_attach+0xe8/0x180
        bus_probe_device+0x88/0x90
        device_add+0x4c4/0x7e8
        platform_device_add+0x120/0x25c
        mfd_add_devices+0x580/0x60c
        tps65090_i2c_probe+0xb8/0x184
        i2c_device_probe+0x234/0x2a4
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        bus_for_each_drv+0x78/0xbc
        __device_attach+0xe8/0x180
        bus_probe_device+0x88/0x90
        device_add+0x4c4/0x7e8
        i2c_new_client_device+0x15c/0x27c
        of_i2c_register_devices+0x114/0x184
        i2c_register_adapter+0x1d8/0x6dc
        ec_i2c_probe+0xc8/0x124
        platform_probe+0x80/0xc0
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        bus_for_each_drv+0x78/0xbc
        __device_attach+0xe8/0x180
        bus_probe_device+0x88/0x90
        device_add+0x4c4/0x7e8
        of_platform_device_create_pdata+0x90/0xc8
        of_platform_bus_create+0x1a0/0x4ec
        of_platform_populate+0x88/0x120
        devm_of_platform_populate+0x40/0x80
        cros_ec_register+0x174/0x308
        cros_ec_spi_probe+0x16c/0x1ec
        spi_probe+0x88/0xac
        really_probe+0x1c4/0x4e4
        driver_probe_device+0x78/0x1d8
        device_driver_attach+0x58/0x60
        __driver_attach+0xfc/0x160
        bus_for_each_dev+0x6c/0xb8
        bus_add_driver+0x170/0x20c
        driver_register+0x78/0x10c
        do_one_initcall+0x88/0x438
        kernel_init_freeable+0x18c/0x1dc
        kernel_init+0x8/0x118
        ret_from_fork+0x14/0x38
        0x0

other info that might help us debug this:

Chain exists of:
   regulator_list_mutex --> regulator_ww_class_acquire --> 
regulator_ww_class_mutex

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(regulator_ww_class_mutex);
                                lock(regulator_ww_class_acquire);
                                lock(regulator_ww_class_mutex);
   lock(regulator_list_mutex);

  *** DEADLOCK ***

5 locks held by swapper/0/1:
  #0: dfb6e4c8 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x60
  #1: c1fedcd8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
  #2: df53a4e8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
  #3: df5224d8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
  #4: df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at: 
regulator_resolve_supply+0x44/0x318

stack backtrace:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc1-00008-geaa7995c529b 
#10095
Hardware name: Samsung Exynos (Flattened Device Tree)
[<c01116e8>] (unwind_backtrace) from [<c010cf58>] (show_stack+0x10/0x14)
[<c010cf58>] (show_stack) from [<c0b38ffc>] (dump_stack+0xa4/0xc4)
[<c0b38ffc>] (dump_stack) from [<c0193458>] (check_noncircular+0x14c/0x164)
[<c0193458>] (check_noncircular) from [<c0196b90>] 
(__lock_acquire+0x1830/0x31cc)
[<c0196b90>] (__lock_acquire) from [<c01991e4>] (lock_acquire+0x2e4/0x5dc)
[<c01991e4>] (lock_acquire) from [<c0b4043c>] (__mutex_lock+0xa4/0xb60)
[<c0b4043c>] (__mutex_lock) from [<c0b40f14>] (mutex_lock_nested+0x1c/0x24)
[<c0b40f14>] (mutex_lock_nested) from [<c05ccd94>] 
(regulator_lock_dependent+0x4c/0x2b0)
[<c05ccd94>] (regulator_lock_dependent) from [<c05d220c>] 
(regulator_enable+0x30/0xe4)
[<c05d220c>] (regulator_enable) from [<c05d248c>] 
(regulator_resolve_supply+0x1cc/0x318)
[<c05d248c>] (regulator_resolve_supply) from [<c05d2974>] 
(regulator_register_resolve_supply+0x14/0x78)
[<c05d2974>] (regulator_register_resolve_supply) from [<c06a3000>] 
(class_for_each_device+0x68/0xe8)
[<c06a3000>] (class_for_each_device) from [<c05d3e20>] 
(regulator_register+0xa2c/0xc9c)
[<c05d3e20>] (regulator_register) from [<c05d5c70>] 
(devm_regulator_register+0x40/0x70)
[<c05d5c70>] (devm_regulator_register) from [<c05dea58>] 
(tps65090_regulator_probe+0x150/0x648)
[<c05dea58>] (tps65090_regulator_probe) from [<c06a3fe8>] 
(platform_probe+0x80/0xc0)
[<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>] 
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>] 
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] 
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>] 
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c06a3bac>] 
(platform_device_add+0x120/0x25c)
[<c06a3bac>] (platform_device_add) from [<c06d5c7c>] 
(mfd_add_devices+0x580/0x60c)
[<c06d5c7c>] (mfd_add_devices) from [<c06d80e8>] 
(tps65090_i2c_probe+0xb8/0x184)
[<c06d80e8>] (tps65090_i2c_probe) from [<c0822520>] 
(i2c_device_probe+0x234/0x2a4)
[<c0822520>] (i2c_device_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>] 
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>] 
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] 
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>] 
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c0824aec>] 
(i2c_new_client_device+0x15c/0x27c)
[<c0824aec>] (i2c_new_client_device) from [<c08285e0>] 
(of_i2c_register_devices+0x114/0x184)
[<c08285e0>] (of_i2c_register_devices) from [<c08254b8>] 
(i2c_register_adapter+0x1d8/0x6dc)
[<c08254b8>] (i2c_register_adapter) from [<c082dd1c>] 
(ec_i2c_probe+0xc8/0x124)
[<c082dd1c>] (ec_i2c_probe) from [<c06a3fe8>] (platform_probe+0x80/0xc0)
[<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>] 
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>] 
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>] 
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>] 
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c08b140c>] 
(of_platform_device_create_pdata+0x90/0xc8)
[<c08b140c>] (of_platform_device_create_pdata) from [<c08b15f0>] 
(of_platform_bus_create+0x1a0/0x4ec)
[<c08b15f0>] (of_platform_bus_create) from [<c08b1af0>] 
(of_platform_populate+0x88/0x120)
[<c08b1af0>] (of_platform_populate) from [<c08b1bdc>] 
(devm_of_platform_populate+0x40/0x80)
[<c08b1bdc>] (devm_of_platform_populate) from [<c08b72fc>] 
(cros_ec_register+0x174/0x308)
[<c08b72fc>] (cros_ec_register) from [<c08b868c>] 
(cros_ec_spi_probe+0x16c/0x1ec)
[<c08b868c>] (cros_ec_spi_probe) from [<c071b2f4>] (spi_probe+0x88/0xac)
[<c071b2f4>] (spi_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>] 
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c06a19c4>] 
(device_driver_attach+0x58/0x60)
[<c06a19c4>] (device_driver_attach) from [<c06a1ac8>] 
(__driver_attach+0xfc/0x160)
[<c06a1ac8>] (__driver_attach) from [<c069f0cc>] 
(bus_for_each_dev+0x6c/0xb8)
[<c069f0cc>] (bus_for_each_dev) from [<c06a0204>] 
(bus_add_driver+0x170/0x20c)
[<c06a0204>] (bus_add_driver) from [<c06a2968>] (driver_register+0x78/0x10c)
[<c06a2968>] (driver_register) from [<c0102428>] 
(do_one_initcall+0x88/0x438)
[<c0102428>] (do_one_initcall) from [<c1101104>] 
(kernel_init_freeable+0x18c/0x1dc)
[<c1101104>] (kernel_init_freeable) from [<c0b3c65c>] 
(kernel_init+0x8/0x118)
[<c0b3c65c>] (kernel_init) from [<c010011c>] (ret_from_fork+0x14/0x38)
Exception stack(0xc1ce3fb0 to 0xc1ce3ff8)
3fa0:                                     00000000 00000000 00000000 
00000000
3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
00000000
3fe0: 00000000 00000000 00000000 00000000 00000013 00000000

I didn't analyze it yet if this warning is really an issue or just a 
false positive. If you have any hints or comments let me know.

> ---
>   drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++-----------
>   1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> index fee9241..3ae5ccd 100644
> --- a/drivers/regulator/core.c
> +++ b/drivers/regulator/core.c
> @@ -1813,23 +1813,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
>   {
>   	struct regulator_dev *r;
>   	struct device *dev = rdev->dev.parent;
> -	int ret;
> +	int ret = 0;
>   
>   	/* No supply to resolve? */
>   	if (!rdev->supply_name)
>   		return 0;
>   
> -	/* Supply already resolved? */
> +	/* Supply already resolved? (fast-path without locking contention) */
>   	if (rdev->supply)
>   		return 0;
>   
> +	/*
> +	 * Recheck rdev->supply with rdev->mutex lock held to avoid a race
> +	 * between rdev->supply null check and setting rdev->supply in
> +	 * set_supply() from concurrent tasks.
> +	 */
> +	regulator_lock(rdev);
> +
> +	/* Supply just resolved by a concurrent task? */
> +	if (rdev->supply)
> +		goto out;
> +
>   	r = regulator_dev_lookup(dev, rdev->supply_name);
>   	if (IS_ERR(r)) {
>   		ret = PTR_ERR(r);
>   
>   		/* Did the lookup explicitly defer for us? */
>   		if (ret == -EPROBE_DEFER)
> -			return ret;
> +			goto out;
>   
>   		if (have_full_constraints()) {
>   			r = dummy_regulator_rdev;
> @@ -1837,15 +1848,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
>   		} else {
>   			dev_err(dev, "Failed to resolve %s-supply for %s\n",
>   				rdev->supply_name, rdev->desc->name);
> -			return -EPROBE_DEFER;
> +			ret = -EPROBE_DEFER;
> +			goto out;
>   		}
>   	}
>   
>   	if (r == rdev) {
>   		dev_err(dev, "Supply for %s (%s) resolved to itself\n",
>   			rdev->desc->name, rdev->supply_name);
> -		if (!have_full_constraints())
> -			return -EINVAL;
> +		if (!have_full_constraints()) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
>   		r = dummy_regulator_rdev;
>   		get_device(&r->dev);
>   	}
> @@ -1859,7 +1873,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
>   	if (r->dev.parent && r->dev.parent != rdev->dev.parent) {
>   		if (!device_is_bound(r->dev.parent)) {
>   			put_device(&r->dev);
> -			return -EPROBE_DEFER;
> +			ret = -EPROBE_DEFER;
> +			goto out;
>   		}
>   	}
>   
> @@ -1867,13 +1882,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
>   	ret = regulator_resolve_supply(r);
>   	if (ret < 0) {
>   		put_device(&r->dev);
> -		return ret;
> +		goto out;
>   	}
>   
>   	ret = set_supply(rdev, r);
>   	if (ret < 0) {
>   		put_device(&r->dev);
> -		return ret;
> +		goto out;
>   	}
>   
>   	/*
> @@ -1886,11 +1901,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
>   		if (ret < 0) {
>   			_regulator_put(rdev->supply);
>   			rdev->supply = NULL;
> -			return ret;
> +			goto out;
>   		}
>   	}
>   
> -	return 0;
> +out:
> +	regulator_unlock(rdev);
> +	return ret;
>   }
>   
>   /* Internal regulator request function */

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux