Dear All, On 2019-04-18 21:58, Guenter Roeck wrote: > Use devm_thermal_of_cooling_device_register() to register the cooling > device. Also use devm_add_action_or_reset() to stop the fan on device > removal, and to disable the pwm. Introduce a local 'dev' variable in > the probe function to make the code easier to read. > > As a side effect, this fixes a bug seen if pwm_fan_of_get_cooling_data() > returned an error. In that situation, the pwm was not disabled, and > the fan was not stopped. Using devm functions also ensures that the > pwm is disabled and that the fan is stopped only after the hwmon device > has been unregistered. > > Cc: Lukasz Majewski <l.majewski@xxxxxxxxxxx> > Signed-off-by: Guenter Roeck <linux@xxxxxxxxxxxx> I've noticed the following lockdep warning after this commit during CPU hotplug tests on TM2e board (ARM64 Exynos5433). It looks like a false positive, but it would be nice to annotate it somehow in the code to make lockdep happy: --->8--- IRQ 8: no longer affine to CPU5 CPU5: shutdown IRQ 9: no longer affine to CPU6 CPU6: shutdown ====================================================== WARNING: possible circular locking dependency detected 5.2.0-rc1+ #6093 Not tainted ------------------------------------------------------ cpuhp/7/43 is trying to acquire lock: 00000000d1a60be3 (thermal_list_lock){+.+.}, at: thermal_cooling_device_unregister+0x34/0x1c0 but task is already holding lock: 00000000a6a56c92 (&policy->rwsem){++++}, at: cpufreq_offline+0x68/0x228 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&policy->rwsem){++++}: down_write+0x48/0x98 cpufreq_cpu_acquire+0x20/0x58 cpufreq_update_policy+0x28/0xc0 cpufreq_set_cur_state+0x44/0x70 thermal_cdev_update+0x7c/0x240 step_wise_throttle+0x4c/0x88 handle_thermal_trip+0xc0/0x348 thermal_zone_device_update.part.7+0x6c/0x250 thermal_zone_device_update+0x28/0x38 exynos_tmu_work+0x28/0x70 process_one_work+0x298/0x6c0 worker_thread+0x48/0x430 kthread+0x100/0x130 ret_from_fork+0x10/0x18 -> #2 (&cdev->lock){+.+.}: __mutex_lock+0x90/0x890 mutex_lock_nested+0x1c/0x28 thermal_zone_bind_cooling_device+0x2cc/0x3e0 of_thermal_bind+0x9c/0xf8 __thermal_cooling_device_register+0x1a4/0x388 thermal_of_cooling_device_register+0xc/0x18 __cpufreq_cooling_register+0x360/0x410 of_cpufreq_cooling_register+0x84/0xf8 cpufreq_online+0x414/0x720 cpufreq_add_dev+0x78/0x88 subsys_interface_register+0xa4/0xf8 cpufreq_register_driver+0x140/0x1e0 dt_cpufreq_probe+0xb0/0x130 platform_drv_probe+0x50/0xa8 really_probe+0x1b0/0x2a0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xc0 bus_for_each_drv+0x70/0xc8 __device_attach+0xdc/0x140 device_initial_probe+0x10/0x18 bus_probe_device+0x94/0xa0 device_add+0x39c/0x5c8 platform_device_add+0x110/0x248 platform_device_register_full+0x134/0x178 cpufreq_dt_platdev_init+0x114/0x14c do_one_initcall+0x84/0x430 kernel_init_freeable+0x440/0x534 kernel_init+0x10/0x108 ret_from_fork+0x10/0x18 -> #1 (&tz->lock){+.+.}: __mutex_lock+0x90/0x890 mutex_lock_nested+0x1c/0x28 thermal_zone_bind_cooling_device+0x2b8/0x3e0 of_thermal_bind+0x9c/0xf8 __thermal_cooling_device_register+0x1a4/0x388 thermal_of_cooling_device_register+0xc/0x18 __cpufreq_cooling_register+0x360/0x410 of_cpufreq_cooling_register+0x84/0xf8 cpufreq_online+0x414/0x720 cpufreq_add_dev+0x78/0x88 subsys_interface_register+0xa4/0xf8 cpufreq_register_driver+0x140/0x1e0 dt_cpufreq_probe+0xb0/0x130 platform_drv_probe+0x50/0xa8 really_probe+0x1b0/0x2a0 driver_probe_device+0x58/0x100 __device_attach_driver+0x90/0xc0 bus_for_each_drv+0x70/0xc8 __device_attach+0xdc/0x140 device_initial_probe+0x10/0x18 bus_probe_device+0x94/0xa0 device_add+0x39c/0x5c8 platform_device_add+0x110/0x248 platform_device_register_full+0x134/0x178 cpufreq_dt_platdev_init+0x114/0x14c do_one_initcall+0x84/0x430 kernel_init_freeable+0x440/0x534 kernel_init+0x10/0x108 ret_from_fork+0x10/0x18 -> #0 (thermal_list_lock){+.+.}: lock_acquire+0xdc/0x260 __mutex_lock+0x90/0x890 mutex_lock_nested+0x1c/0x28 thermal_cooling_device_unregister+0x34/0x1c0 cpufreq_cooling_unregister+0x78/0xd0 cpufreq_offline+0x200/0x228 cpuhp_cpufreq_offline+0xc/0x18 cpuhp_invoke_callback+0xd0/0xcb0 cpuhp_thread_fun+0x1e8/0x258 smpboot_thread_fn+0x1b4/0x2d0 kthread+0x100/0x130 ret_from_fork+0x10/0x18 other info that might help us debug this: Chain exists of: thermal_list_lock --> &cdev->lock --> &policy->rwsem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&policy->rwsem); lock(&cdev->lock); lock(&policy->rwsem); lock(thermal_list_lock); *** DEADLOCK *** 3 locks held by cpuhp/7/43: #0: 00000000ae30cc2b (cpu_hotplug_lock.rw_sem){++++}, at: cpuhp_thread_fun+0x34/0x258 #1: 00000000a0e2460a (cpuhp_state-down){+.+.}, at: cpuhp_thread_fun+0x178/0x258 #2: 00000000a6a56c92 (&policy->rwsem){++++}, at: cpufreq_offline+0x68/0x228 stack backtrace: CPU: 7 PID: 43 Comm: cpuhp/7 Not tainted 5.2.0-rc1+ #6093 Hardware name: Samsung TM2E board (DT) Call trace: dump_backtrace+0x0/0x158 show_stack+0x14/0x20 dump_stack+0xc8/0x114 print_circular_bug+0x1cc/0x2d8 __lock_acquire+0x18f4/0x20f8 lock_acquire+0xdc/0x260 __mutex_lock+0x90/0x890 mutex_lock_nested+0x1c/0x28 thermal_cooling_device_unregister+0x34/0x1c0 cpufreq_cooling_unregister+0x78/0xd0 cpufreq_offline+0x200/0x228 cpuhp_cpufreq_offline+0xc/0x18 cpuhp_invoke_callback+0xd0/0xcb0 cpuhp_thread_fun+0x1e8/0x258 smpboot_thread_fn+0x1b4/0x2d0 kthread+0x100/0x130 ret_from_fork+0x10/0x18 IRQ 10: no longer affine to CPU7 --->8--- > --- > drivers/hwmon/pwm-fan.c | 73 ++++++++++++++++++++----------------------------- > 1 file changed, 29 insertions(+), 44 deletions(-) > > diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c > index 167221c7628a..0243ba70107e 100644 > --- a/drivers/hwmon/pwm-fan.c > +++ b/drivers/hwmon/pwm-fan.c > @@ -207,33 +207,44 @@ static int pwm_fan_of_get_cooling_data(struct device *dev, > return 0; > } > > +static void pwm_fan_regulator_disable(void *data) > +{ > + regulator_disable(data); > +} > + > +static void pwm_fan_pwm_disable(void *data) > +{ > + pwm_disable(data); > +} > + > static int pwm_fan_probe(struct platform_device *pdev) > { > struct thermal_cooling_device *cdev; > + struct device *dev = &pdev->dev; > struct pwm_fan_ctx *ctx; > struct device *hwmon; > int ret; > struct pwm_state state = { }; > > - ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL); > + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); > if (!ctx) > return -ENOMEM; > > mutex_init(&ctx->lock); > > - ctx->pwm = devm_of_pwm_get(&pdev->dev, pdev->dev.of_node, NULL); > + ctx->pwm = devm_of_pwm_get(dev, dev->of_node, NULL); > if (IS_ERR(ctx->pwm)) { > ret = PTR_ERR(ctx->pwm); > > if (ret != -EPROBE_DEFER) > - dev_err(&pdev->dev, "Could not get PWM: %d\n", ret); > + dev_err(dev, "Could not get PWM: %d\n", ret); > > return ret; > } > > platform_set_drvdata(pdev, ctx); > > - ctx->reg_en = devm_regulator_get_optional(&pdev->dev, "fan"); > + ctx->reg_en = devm_regulator_get_optional(dev, "fan"); > if (IS_ERR(ctx->reg_en)) { > if (PTR_ERR(ctx->reg_en) != -ENODEV) > return PTR_ERR(ctx->reg_en); > @@ -242,10 +253,11 @@ static int pwm_fan_probe(struct platform_device *pdev) > } else { > ret = regulator_enable(ctx->reg_en); > if (ret) { > - dev_err(&pdev->dev, > - "Failed to enable fan supply: %d\n", ret); > + dev_err(dev, "Failed to enable fan supply: %d\n", ret); > return ret; > } > + devm_add_action_or_reset(dev, pwm_fan_regulator_disable, > + ctx->reg_en); > } > > ctx->pwm_value = MAX_PWM; > @@ -257,62 +269,36 @@ static int pwm_fan_probe(struct platform_device *pdev) > > ret = pwm_apply_state(ctx->pwm, &state); > if (ret) { > - dev_err(&pdev->dev, "Failed to configure PWM\n"); > - goto err_reg_disable; > + dev_err(dev, "Failed to configure PWM\n"); > + return ret; > } > + devm_add_action_or_reset(dev, pwm_fan_pwm_disable, ctx->pwm); > > - hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, "pwmfan", > + hwmon = devm_hwmon_device_register_with_groups(dev, "pwmfan", > ctx, pwm_fan_groups); > if (IS_ERR(hwmon)) { > - dev_err(&pdev->dev, "Failed to register hwmon device\n"); > - ret = PTR_ERR(hwmon); > - goto err_pwm_disable; > + dev_err(dev, "Failed to register hwmon device\n"); > + return PTR_ERR(hwmon); > } > > - ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx); > + ret = pwm_fan_of_get_cooling_data(dev, ctx); > if (ret) > return ret; > > ctx->pwm_fan_state = ctx->pwm_fan_max_state; > if (IS_ENABLED(CONFIG_THERMAL)) { > - cdev = thermal_of_cooling_device_register(pdev->dev.of_node, > - "pwm-fan", ctx, > - &pwm_fan_cooling_ops); > + cdev = devm_thermal_of_cooling_device_register(dev, > + dev->of_node, "pwm-fan", ctx, &pwm_fan_cooling_ops); > if (IS_ERR(cdev)) { > - dev_err(&pdev->dev, > + dev_err(dev, > "Failed to register pwm-fan as cooling device"); > - ret = PTR_ERR(cdev); > - goto err_pwm_disable; > + return PTR_ERR(cdev); > } > ctx->cdev = cdev; > thermal_cdev_update(cdev); > } > > return 0; > - > -err_pwm_disable: > - state.enabled = false; > - pwm_apply_state(ctx->pwm, &state); > - > -err_reg_disable: > - if (ctx->reg_en) > - regulator_disable(ctx->reg_en); > - > - return ret; > -} > - > -static int pwm_fan_remove(struct platform_device *pdev) > -{ > - struct pwm_fan_ctx *ctx = platform_get_drvdata(pdev); > - > - thermal_cooling_device_unregister(ctx->cdev); > - if (ctx->pwm_value) > - pwm_disable(ctx->pwm); > - > - if (ctx->reg_en) > - regulator_disable(ctx->reg_en); > - > - return 0; > } > > #ifdef CONFIG_PM_SLEEP > @@ -380,7 +366,6 @@ MODULE_DEVICE_TABLE(of, of_pwm_fan_match); > > static struct platform_driver pwm_fan_driver = { > .probe = pwm_fan_probe, > - .remove = pwm_fan_remove, > .driver = { > .name = "pwm-fan", > .pm = &pwm_fan_pm, Best regards -- Marek Szyprowski, PhD Samsung R&D Institute Poland