Hi Song, Please check the attached and see if you can reproduce this. Thanks, Srinivas On Tue, 2022-01-11 at 18:31 +0800, Liwei Song wrote: > exist the below call sequences may cause deadlock: > > isst_if_probe() > --> isst_if_cdev_register() > --> mutex_lock(&punit_misc_dev_lock) > --> misc_register() > --> mutex_lock(&misc_mtx) > > misc_open() > --> mutex_lock(&misc_mtx) > --> isst_if_open() > --> mutex_lock(&punit_misc_dev_lock) > > to fix this do not hold punit_misc_dev_lock when call misc_register > since it has misc_mtx lock for sync. > > [ 256.104522] ====================================================== > [ 256.113783] WARNING: possible circular locking dependency detected > [ 256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted > [ 256.125362] ------------------------------------------------------ > [ 256.131673] intel-speed-sel/844 is trying to acquire lock: > [ 256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, > at: isst_if_open+0x18/0x90 [isst_if_common] > [ 256.147171] > [ 256.147171] but task is already holding lock: > [ 256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: > misc_open+0x2a/0x170 > [ 256.160407] > [ 256.160407] which lock already depends on the new lock. > [ 256.160407] > [ 256.168712] > [ 256.168712] the existing dependency chain (in reverse order) is: > [ 256.176327] > [ 256.176327] -> #1 (misc_mtx){+.+.}-{3:3}: > [ 256.181946] lock_acquire+0x1e6/0x330 > [ 256.186265] __mutex_lock+0x9b/0x9b0 > [ 256.190497] mutex_lock_nested+0x1b/0x20 > [ 256.195075] misc_register+0x32/0x1a0 > [ 256.199390] isst_if_cdev_register+0x65/0x180 > [isst_if_common] > [ 256.205878] isst_if_probe+0x144/0x16e [isst_if_mmio] > [ 256.209991] hrtimer: interrupt took 10370 ns > [ 256.211582] local_pci_probe+0x47/0xa0 > [ 256.220384] work_for_cpu_fn+0x17/0x30 > [ 256.224790] process_one_work+0x26a/0x650 > [ 256.229456] worker_thread+0x1dd/0x3b0 > [ 256.233861] kthread+0x191/0x1c0 > [ 256.237745] ret_from_fork+0x1f/0x30 > [ 256.241976] > [ 256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}: > [ 256.248552] validate_chain+0xbc6/0x1750 > [ 256.253131] __lock_acquire+0x88c/0xc10 > [ 256.257618] lock_acquire+0x1e6/0x330 > [ 256.261933] __mutex_lock+0x9b/0x9b0 > [ 256.266165] mutex_lock_nested+0x1b/0x20 > [ 256.270739] isst_if_open+0x18/0x90 [isst_if_common] > [ 256.276356] misc_open+0x100/0x170 > [ 256.280409] chrdev_open+0xa5/0x1e0 > [ 256.284550] do_dentry_open+0x23d/0x3c0 > [ 256.289039] vfs_open+0x2f/0x40 > [ 256.292836] path_openat+0x87a/0x940 > [ 256.297064] do_filp_open+0xc5/0x140 > [ 256.301295] do_sys_openat2+0x23d/0x320 > [ 256.305782] do_sys_open+0x59/0x80 > [ 256.309836] __x64_sys_openat+0x20/0x30 > [ 256.314324] do_syscall_64+0x3f/0x90 > [ 256.318552] entry_SYSCALL_64_after_hwframe+0x44/0xae > [ 256.324259] > [ 256.324259] other info that might help us debug this: > [ 256.324259] > [ 256.332394] Possible unsafe locking scenario: > [ 256.332394] > [ 256.338444] CPU0 CPU1 > [ 256.343105] ---- ---- > [ 256.347768] lock(misc_mtx); > [ 256.350870] lock(punit_misc_dev_loc > k); > [ 256.357441] lock(misc_mtx); > [ 256.363058] lock(punit_misc_dev_lock); > [ 256.367110] > [ 256.367110] *** DEADLOCK *** > [ 256.367110] > [ 256.373162] 1 lock held by intel-speed-sel/844: > [ 256.377824] #0: ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: > misc_open+0x2a/0x170 > [ 256.385531] > [ 256.385531] stack backtrace: > [ 256.390021] CPU: 12 PID: 844 Comm: intel-speed-sel Not tainted > 5.16.0-rc6-yocto-standard+ #99 > [ 256.398678] Hardware name: ACCTON MOROCITY/MOROCITY, BIOS > IDVLCRB1.86B.0021.D09.2111010103 11/01/2021 > [ 256.408028] Call Trace: > [ 256.410605] <TASK> > [ 256.412837] dump_stack_lvl+0x5b/0x82 > [ 256.416635] dump_stack+0x10/0x12 > [ 256.420085] print_circular_bug.isra.43+0x261/0x2c0 > [ 256.425095] check_noncircular+0x126/0x140 > [ 256.429326] ? __this_cpu_preempt_check+0x13/0x20 > [ 256.434167] validate_chain+0xbc6/0x1750 > [ 256.438223] ? validate_chain+0xbc6/0x1750 > [ 256.442451] ? validate_chain+0x236/0x1750 > [ 256.446687] __lock_acquire+0x88c/0xc10 > [ 256.450658] lock_acquire+0x1e6/0x330 > [ 256.454452] ? isst_if_open+0x18/0x90 [isst_if_common] > [ 256.459726] ? __mutex_lock+0x79/0x9b0 > [ 256.463610] ? __mutex_lock+0x79/0x9b0 > [ 256.467493] ? isst_if_open+0x18/0x90 [isst_if_common] > [ 256.472764] ? isst_if_open+0x18/0x90 [isst_if_common] > [ 256.478038] __mutex_lock+0x9b/0x9b0 > [ 256.481748] ? isst_if_open+0x18/0x90 [isst_if_common] > [ 256.487021] ? __mutex_lock+0x102/0x9b0 > [ 256.490993] ? __this_cpu_preempt_check+0x13/0x20 > [ 256.495837] mutex_lock_nested+0x1b/0x20 > [ 256.499893] ? mutex_lock_nested+0x1b/0x20 > [ 256.504121] isst_if_open+0x18/0x90 [isst_if_common] > [ 256.509222] misc_open+0x100/0x170 > [ 256.512759] chrdev_open+0xa5/0x1e0 > [ 256.516386] ? cdev_put.part.1+0x20/0x20 > [ 256.520441] do_dentry_open+0x23d/0x3c0 > [ 256.524414] vfs_open+0x2f/0x40 > [ 256.527689] path_openat+0x87a/0x940 > [ 256.531399] do_filp_open+0xc5/0x140 > [ 256.535112] ? trace_preempt_on+0x28/0xd0 > [ 256.539255] ? alloc_fd+0x152/0x230 > [ 256.542880] ? preempt_count_sub+0x9b/0x100 > [ 256.547200] ? _raw_spin_unlock+0x2c/0x50 > [ 256.551348] do_sys_openat2+0x23d/0x320 > [ 256.555320] ? do_sys_openat2+0x23d/0x320 > [ 256.559467] do_sys_open+0x59/0x80 > [ 256.563003] __x64_sys_openat+0x20/0x30 > [ 256.566972] do_syscall_64+0x3f/0x90 > [ 256.570680] entry_SYSCALL_64_after_hwframe+0x44/0xae > [ 256.575866] RIP: 0033:0x7f9be4b97c27 > [ 256.579576] Code: 25 00 00 41 00 3d 00 00 41 00 74 37 64 8b 04 25 > 18 00 00 00 85 c0 75 5b 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 > 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 85 00 00 00 48 83 c4 68 5d 41 5c > c3 0f 1f > [ 256.598474] RSP: 002b:00007ffd8fc01b70 EFLAGS: 00000246 ORIG_RAX: > 0000000000000101 > [ 256.606177] RAX: ffffffffffffffda RBX: 00005572f20332b0 RCX: > 00007f9be4b97c27 > [ 256.613443] RDX: 0000000000000000 RSI: 00005572f202936a RDI: > 00000000ffffff9c > [ 256.620709] RBP: 00005572f202936a R08: 0000000000000008 R09: > 0000000000000001 > [ 256.627974] R10: 0000000000000000 R11: 0000000000000246 R12: > 0000000000000000 > [ 256.635241] R13: 00005572f20332b0 R14: 0000000000000001 R15: > 0000000000000000 > [ 256.642513] </TASK> > > Signed-off-by: Liwei Song <liwei.song@xxxxxxxxxxxxx> > --- > drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 6 > +++++- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git > a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c > b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c > index c9a85eb2e860..bcbc0d508ec4 100644 > --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c > +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c > @@ -693,10 +693,12 @@ int isst_if_cdev_register(int device_type, > struct isst_if_cmd_cb *cb) > if (!misc_usage_count) { > int ret; > > + mutex_unlock(&punit_misc_dev_lock); > misc_device_ret = misc_register(&isst_if_char_driver); > if (misc_device_ret) > - goto unlock_exit; > + return misc_device_ret; > > + mutex_lock(&punit_misc_dev_lock); > ret = isst_if_cpu_info_init(); > if (ret) { > misc_deregister(&isst_if_char_driver); > @@ -731,7 +733,9 @@ void isst_if_cdev_unregister(int device_type) > if (device_type == ISST_IF_DEV_MBOX) > isst_delete_hash(); > if (!misc_usage_count && !misc_device_ret) { > + mutex_unlock(&punit_misc_dev_lock); > misc_deregister(&isst_if_char_driver); > + mutex_lock(&punit_misc_dev_lock); > isst_if_cpu_info_exit(); > } > mutex_unlock(&punit_misc_dev_lock);
From e92c9c429dbd259778b94c45c4723ddb0d1670fb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx> Date: Tue, 11 Jan 2022 12:04:28 -0800 Subject: [PATCH] platform/x86: ISST: Fix possible circular locking dependency detected As reported: [ 256.104522] ====================================================== [ 256.113783] WARNING: possible circular locking dependency detected [ 256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted [ 256.125362] ------------------------------------------------------ [ 256.131673] intel-speed-sel/844 is trying to acquire lock: [ 256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, at: isst_if_open+0x18/0x90 [isst_if_common] [ 256.147171] [ 256.147171] but task is already holding lock: [ 256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: misc_open+0x2a/0x170 [ 256.160407] [ 256.160407] which lock already depends on the new lock. [ 256.160407] [ 256.168712] [ 256.168712] the existing dependency chain (in reverse order) is: [ 256.176327] [ 256.176327] -> #1 (misc_mtx){+.+.}-{3:3}: [ 256.181946] lock_acquire+0x1e6/0x330 [ 256.186265] __mutex_lock+0x9b/0x9b0 [ 256.190497] mutex_lock_nested+0x1b/0x20 [ 256.195075] misc_register+0x32/0x1a0 [ 256.199390] isst_if_cdev_register+0x65/0x180 [isst_if_common] [ 256.205878] isst_if_probe+0x144/0x16e [isst_if_mmio] ... [ 256.241976] [ 256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}: [ 256.248552] validate_chain+0xbc6/0x1750 [ 256.253131] __lock_acquire+0x88c/0xc10 [ 256.257618] lock_acquire+0x1e6/0x330 [ 256.261933] __mutex_lock+0x9b/0x9b0 [ 256.266165] mutex_lock_nested+0x1b/0x20 [ 256.270739] isst_if_open+0x18/0x90 [isst_if_common] [ 256.276356] misc_open+0x100/0x170 [ 256.280409] chrdev_open+0xa5/0x1e0 ... The call sequence suggested that misc_device /dev file can be opened before misc device is yet to be registered, which is done only once. Here punit_misc_dev_lock was used as common lock, to protect the registration by ISST HW drivers, one time setup, prevent duplicate registry of misc device and prevent load/unload when device is open. We can split into locks: - One which just prevent duplicate call to misc_register() and one time setup. Also never call again if the misc_register() failed or required one time setup is failed. This lock is not shared with any misc device callbacks. - The other lock protects registry, load and unload of HW drivers. Sequence in isst_if_cdev_register() - Register callbacks under punit_misc_dev_open_lock - Call isst_misc_reg() which registers misc_device on the first registry which is under punit_misc_dev_reg_lock, which is not shared with callbacks. Sequence in isst_if_cdev_unregister Just opposite of isst_if_cdev_register Reported-by: Liwei Song <liwei.song@xxxxxxxxxxxxx> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx> --- .../intel/speed_select_if/isst_if_common.c | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index c9a85eb2e860..e8424e70d81d 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -596,7 +596,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; } -static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -606,7 +609,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0; /* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -628,7 +631,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return ret; } @@ -637,7 +640,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -645,7 +648,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return 0; } @@ -662,6 +665,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, }; +static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -679,38 +719,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret; if (device_type >= ISST_IF_DEV_MAX) return -EINVAL; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); - return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register); @@ -725,16 +758,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); -- 2.25.1