Re: [PATCH] platform/x86: ISST: do not hold lock punit_misc_dev_lock when register misc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Song,

Please check the attached and see if you can reproduce this.

Thanks,
Srinivas

On Tue, 2022-01-11 at 18:31 +0800, Liwei Song wrote:
> exist the below call sequences may cause deadlock:
> 
> isst_if_probe()
>    --> isst_if_cdev_register()
>       --> mutex_lock(&punit_misc_dev_lock)
>    --> misc_register()
>       --> mutex_lock(&misc_mtx)
> 
> misc_open()
>    --> mutex_lock(&misc_mtx)
>    --> isst_if_open()
>       --> mutex_lock(&punit_misc_dev_lock)
> 
> to fix this do not hold punit_misc_dev_lock when call misc_register
> since it has misc_mtx lock for sync.
> 
> [  256.104522] ======================================================
> [  256.113783] WARNING: possible circular locking dependency detected
> [  256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted
> [  256.125362] ------------------------------------------------------
> [  256.131673] intel-speed-sel/844 is trying to acquire lock:
> [  256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3},
> at: isst_if_open+0x18/0x90 [isst_if_common]
> [  256.147171]
> [  256.147171] but task is already holding lock:
> [  256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at:
> misc_open+0x2a/0x170
> [  256.160407]
> [  256.160407] which lock already depends on the new lock.
> [  256.160407]
> [  256.168712]
> [  256.168712] the existing dependency chain (in reverse order) is:
> [  256.176327]
> [  256.176327] -> #1 (misc_mtx){+.+.}-{3:3}:
> [  256.181946]        lock_acquire+0x1e6/0x330
> [  256.186265]        __mutex_lock+0x9b/0x9b0
> [  256.190497]        mutex_lock_nested+0x1b/0x20
> [  256.195075]        misc_register+0x32/0x1a0
> [  256.199390]        isst_if_cdev_register+0x65/0x180
> [isst_if_common]
> [  256.205878]        isst_if_probe+0x144/0x16e [isst_if_mmio]
> [  256.209991] hrtimer: interrupt took 10370 ns
> [  256.211582]        local_pci_probe+0x47/0xa0
> [  256.220384]        work_for_cpu_fn+0x17/0x30
> [  256.224790]        process_one_work+0x26a/0x650
> [  256.229456]        worker_thread+0x1dd/0x3b0
> [  256.233861]        kthread+0x191/0x1c0
> [  256.237745]        ret_from_fork+0x1f/0x30
> [  256.241976]
> [  256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}:
> [  256.248552]        validate_chain+0xbc6/0x1750
> [  256.253131]        __lock_acquire+0x88c/0xc10
> [  256.257618]        lock_acquire+0x1e6/0x330
> [  256.261933]        __mutex_lock+0x9b/0x9b0
> [  256.266165]        mutex_lock_nested+0x1b/0x20
> [  256.270739]        isst_if_open+0x18/0x90 [isst_if_common]
> [  256.276356]        misc_open+0x100/0x170
> [  256.280409]        chrdev_open+0xa5/0x1e0
> [  256.284550]        do_dentry_open+0x23d/0x3c0
> [  256.289039]        vfs_open+0x2f/0x40
> [  256.292836]        path_openat+0x87a/0x940
> [  256.297064]        do_filp_open+0xc5/0x140
> [  256.301295]        do_sys_openat2+0x23d/0x320
> [  256.305782]        do_sys_open+0x59/0x80
> [  256.309836]        __x64_sys_openat+0x20/0x30
> [  256.314324]        do_syscall_64+0x3f/0x90
> [  256.318552]        entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  256.324259]
> [  256.324259] other info that might help us debug this:
> [  256.324259]
> [  256.332394]  Possible unsafe locking scenario:
> [  256.332394]
> [  256.338444]        CPU0                    CPU1
> [  256.343105]        ----                    ----
> [  256.347768]   lock(misc_mtx);
> [  256.350870]                                lock(punit_misc_dev_loc
> k);
> [  256.357441]                                lock(misc_mtx);
> [  256.363058]   lock(punit_misc_dev_lock);
> [  256.367110]
> [  256.367110]  *** DEADLOCK ***
> [  256.367110]
> [  256.373162] 1 lock held by intel-speed-sel/844:
> [  256.377824]  #0: ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at:
> misc_open+0x2a/0x170
> [  256.385531]
> [  256.385531] stack backtrace:
> [  256.390021] CPU: 12 PID: 844 Comm: intel-speed-sel Not tainted
> 5.16.0-rc6-yocto-standard+ #99
> [  256.398678] Hardware name: ACCTON MOROCITY/MOROCITY, BIOS
> IDVLCRB1.86B.0021.D09.2111010103 11/01/2021
> [  256.408028] Call Trace:
> [  256.410605]  <TASK>
> [  256.412837]  dump_stack_lvl+0x5b/0x82
> [  256.416635]  dump_stack+0x10/0x12
> [  256.420085]  print_circular_bug.isra.43+0x261/0x2c0
> [  256.425095]  check_noncircular+0x126/0x140
> [  256.429326]  ? __this_cpu_preempt_check+0x13/0x20
> [  256.434167]  validate_chain+0xbc6/0x1750
> [  256.438223]  ? validate_chain+0xbc6/0x1750
> [  256.442451]  ? validate_chain+0x236/0x1750
> [  256.446687]  __lock_acquire+0x88c/0xc10
> [  256.450658]  lock_acquire+0x1e6/0x330
> [  256.454452]  ? isst_if_open+0x18/0x90 [isst_if_common]
> [  256.459726]  ? __mutex_lock+0x79/0x9b0
> [  256.463610]  ? __mutex_lock+0x79/0x9b0
> [  256.467493]  ? isst_if_open+0x18/0x90 [isst_if_common]
> [  256.472764]  ? isst_if_open+0x18/0x90 [isst_if_common]
> [  256.478038]  __mutex_lock+0x9b/0x9b0
> [  256.481748]  ? isst_if_open+0x18/0x90 [isst_if_common]
> [  256.487021]  ? __mutex_lock+0x102/0x9b0
> [  256.490993]  ? __this_cpu_preempt_check+0x13/0x20
> [  256.495837]  mutex_lock_nested+0x1b/0x20
> [  256.499893]  ? mutex_lock_nested+0x1b/0x20
> [  256.504121]  isst_if_open+0x18/0x90 [isst_if_common]
> [  256.509222]  misc_open+0x100/0x170
> [  256.512759]  chrdev_open+0xa5/0x1e0
> [  256.516386]  ? cdev_put.part.1+0x20/0x20
> [  256.520441]  do_dentry_open+0x23d/0x3c0
> [  256.524414]  vfs_open+0x2f/0x40
> [  256.527689]  path_openat+0x87a/0x940
> [  256.531399]  do_filp_open+0xc5/0x140
> [  256.535112]  ? trace_preempt_on+0x28/0xd0
> [  256.539255]  ? alloc_fd+0x152/0x230
> [  256.542880]  ? preempt_count_sub+0x9b/0x100
> [  256.547200]  ? _raw_spin_unlock+0x2c/0x50
> [  256.551348]  do_sys_openat2+0x23d/0x320
> [  256.555320]  ? do_sys_openat2+0x23d/0x320
> [  256.559467]  do_sys_open+0x59/0x80
> [  256.563003]  __x64_sys_openat+0x20/0x30
> [  256.566972]  do_syscall_64+0x3f/0x90
> [  256.570680]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  256.575866] RIP: 0033:0x7f9be4b97c27
> [  256.579576] Code: 25 00 00 41 00 3d 00 00 41 00 74 37 64 8b 04 25
> 18 00 00 00 85 c0 75 5b 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00
> 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 85 00 00 00 48 83 c4 68 5d 41 5c
> c3 0f 1f
> [  256.598474] RSP: 002b:00007ffd8fc01b70 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000101
> [  256.606177] RAX: ffffffffffffffda RBX: 00005572f20332b0 RCX:
> 00007f9be4b97c27
> [  256.613443] RDX: 0000000000000000 RSI: 00005572f202936a RDI:
> 00000000ffffff9c
> [  256.620709] RBP: 00005572f202936a R08: 0000000000000008 R09:
> 0000000000000001
> [  256.627974] R10: 0000000000000000 R11: 0000000000000246 R12:
> 0000000000000000
> [  256.635241] R13: 00005572f20332b0 R14: 0000000000000001 R15:
> 0000000000000000
> [  256.642513]  </TASK>
> 
> Signed-off-by: Liwei Song <liwei.song@xxxxxxxxxxxxx>
> ---
>  drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 6
> +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git
> a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
> b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
> index c9a85eb2e860..bcbc0d508ec4 100644
> --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
> +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
> @@ -693,10 +693,12 @@ int isst_if_cdev_register(int device_type,
> struct isst_if_cmd_cb *cb)
>  	if (!misc_usage_count) {
>  		int ret;
>  
> +		mutex_unlock(&punit_misc_dev_lock);
>  		misc_device_ret = misc_register(&isst_if_char_driver);
>  		if (misc_device_ret)
> -			goto unlock_exit;
> +			return misc_device_ret;
>  
> +		mutex_lock(&punit_misc_dev_lock);
>  		ret = isst_if_cpu_info_init();
>  		if (ret) {
>  			misc_deregister(&isst_if_char_driver);
> @@ -731,7 +733,9 @@ void isst_if_cdev_unregister(int device_type)
>  	if (device_type == ISST_IF_DEV_MBOX)
>  		isst_delete_hash();
>  	if (!misc_usage_count && !misc_device_ret) {
> +		mutex_unlock(&punit_misc_dev_lock);
>  		misc_deregister(&isst_if_char_driver);
> +		mutex_lock(&punit_misc_dev_lock);
>  		isst_if_cpu_info_exit();
>  	}
>  	mutex_unlock(&punit_misc_dev_lock);
From e92c9c429dbd259778b94c45c4723ddb0d1670fb Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
Date: Tue, 11 Jan 2022 12:04:28 -0800
Subject: [PATCH] platform/x86: ISST: Fix possible circular locking dependency
 detected

As reported:

[  256.104522] ======================================================
[  256.113783] WARNING: possible circular locking dependency detected
[  256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted
[  256.125362] ------------------------------------------------------
[  256.131673] intel-speed-sel/844 is trying to acquire lock:
[  256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, at: isst_if_open+0x18/0x90 [isst_if_common]
[  256.147171]
[  256.147171] but task is already holding lock:
[  256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: misc_open+0x2a/0x170
[  256.160407]
[  256.160407] which lock already depends on the new lock.
[  256.160407]
[  256.168712]
[  256.168712] the existing dependency chain (in reverse order) is:
[  256.176327]
[  256.176327] -> #1 (misc_mtx){+.+.}-{3:3}:
[  256.181946]        lock_acquire+0x1e6/0x330
[  256.186265]        __mutex_lock+0x9b/0x9b0
[  256.190497]        mutex_lock_nested+0x1b/0x20
[  256.195075]        misc_register+0x32/0x1a0
[  256.199390]        isst_if_cdev_register+0x65/0x180 [isst_if_common]
[  256.205878]        isst_if_probe+0x144/0x16e [isst_if_mmio]
...
[  256.241976]
[  256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}:
[  256.248552]        validate_chain+0xbc6/0x1750
[  256.253131]        __lock_acquire+0x88c/0xc10
[  256.257618]        lock_acquire+0x1e6/0x330
[  256.261933]        __mutex_lock+0x9b/0x9b0
[  256.266165]        mutex_lock_nested+0x1b/0x20
[  256.270739]        isst_if_open+0x18/0x90 [isst_if_common]
[  256.276356]        misc_open+0x100/0x170
[  256.280409]        chrdev_open+0xa5/0x1e0
...

The call sequence suggested that misc_device /dev file can be opened
before misc device is yet to be registered, which is done only once.

Here punit_misc_dev_lock was used as common lock, to protect the
registration by ISST HW drivers, one time setup, prevent duplicate
registry of misc device and prevent load/unload when device is open.

We can split into locks:
- One which just prevent duplicate call to misc_register() and one
time setup. Also never call again if the misc_register() failed or
required one time setup is failed. This lock is not shared with
any misc device callbacks.

- The other lock protects registry, load and unload of HW drivers.

Sequence in isst_if_cdev_register()
- Register callbacks under punit_misc_dev_open_lock
- Call isst_misc_reg() which registers misc_device on the first
registry which is under punit_misc_dev_reg_lock, which is not
shared with callbacks.

Sequence in isst_if_cdev_unregister
Just opposite of isst_if_cdev_register

Reported-by: Liwei Song <liwei.song@xxxxxxxxxxxxx>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
---
 .../intel/speed_select_if/isst_if_common.c    | 97 ++++++++++++-------
 1 file changed, 63 insertions(+), 34 deletions(-)

diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index c9a85eb2e860..e8424e70d81d 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -596,7 +596,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd,
 	return ret;
 }
 
-static DEFINE_MUTEX(punit_misc_dev_lock);
+/* Lock to prevent module registration when already opened by user space */
+static DEFINE_MUTEX(punit_misc_dev_open_lock);
+/* Lock to allow one share misc device for all ISST interace */
+static DEFINE_MUTEX(punit_misc_dev_reg_lock);
 static int misc_usage_count;
 static int misc_device_ret;
 static int misc_device_open;
@@ -606,7 +609,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
 	int i, ret = 0;
 
 	/* Fail open, if a module is going away */
-	mutex_lock(&punit_misc_dev_lock);
+	mutex_lock(&punit_misc_dev_open_lock);
 	for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
 		struct isst_if_cmd_cb *cb = &punit_callbacks[i];
 
@@ -628,7 +631,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
 	} else {
 		misc_device_open++;
 	}
-	mutex_unlock(&punit_misc_dev_lock);
+	mutex_unlock(&punit_misc_dev_open_lock);
 
 	return ret;
 }
@@ -637,7 +640,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
 {
 	int i;
 
-	mutex_lock(&punit_misc_dev_lock);
+	mutex_lock(&punit_misc_dev_open_lock);
 	misc_device_open--;
 	for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
 		struct isst_if_cmd_cb *cb = &punit_callbacks[i];
@@ -645,7 +648,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
 		if (cb->registered)
 			module_put(cb->owner);
 	}
-	mutex_unlock(&punit_misc_dev_lock);
+	mutex_unlock(&punit_misc_dev_open_lock);
 
 	return 0;
 }
@@ -662,6 +665,43 @@ static struct miscdevice isst_if_char_driver = {
 	.fops		= &isst_if_char_driver_ops,
 };
 
+static int isst_misc_reg(void)
+{
+	mutex_lock(&punit_misc_dev_reg_lock);
+	if (misc_device_ret)
+		goto unlock_exit;
+
+	if (!misc_usage_count) {
+		misc_device_ret = isst_if_cpu_info_init();
+		if (misc_device_ret)
+			goto unlock_exit;
+
+		misc_device_ret = misc_register(&isst_if_char_driver);
+		if (misc_device_ret) {
+			isst_if_cpu_info_exit();
+			goto unlock_exit;
+		}
+	}
+	misc_usage_count++;
+
+unlock_exit:
+	mutex_unlock(&punit_misc_dev_reg_lock);
+
+	return misc_device_ret;
+}
+
+static void isst_misc_unreg(void)
+{
+	mutex_lock(&punit_misc_dev_reg_lock);
+	if (misc_usage_count)
+		misc_usage_count--;
+	if (!misc_usage_count && !misc_device_ret) {
+		misc_deregister(&isst_if_char_driver);
+		isst_if_cpu_info_exit();
+	}
+	mutex_unlock(&punit_misc_dev_reg_lock);
+}
+
 /**
  * isst_if_cdev_register() - Register callback for IOCTL
  * @device_type: The device type this callback handling.
@@ -679,38 +719,31 @@ static struct miscdevice isst_if_char_driver = {
  */
 int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb)
 {
-	if (misc_device_ret)
-		return misc_device_ret;
+	int ret;
 
 	if (device_type >= ISST_IF_DEV_MAX)
 		return -EINVAL;
 
-	mutex_lock(&punit_misc_dev_lock);
+	mutex_lock(&punit_misc_dev_open_lock);
+	/* Device is already open, we don't want to add new callbacks */
 	if (misc_device_open) {
-		mutex_unlock(&punit_misc_dev_lock);
+		mutex_unlock(&punit_misc_dev_open_lock);
 		return -EAGAIN;
 	}
-	if (!misc_usage_count) {
-		int ret;
-
-		misc_device_ret = misc_register(&isst_if_char_driver);
-		if (misc_device_ret)
-			goto unlock_exit;
-
-		ret = isst_if_cpu_info_init();
-		if (ret) {
-			misc_deregister(&isst_if_char_driver);
-			misc_device_ret = ret;
-			goto unlock_exit;
-		}
-	}
 	memcpy(&punit_callbacks[device_type], cb, sizeof(*cb));
 	punit_callbacks[device_type].registered = 1;
-	misc_usage_count++;
-unlock_exit:
-	mutex_unlock(&punit_misc_dev_lock);
+	mutex_unlock(&punit_misc_dev_open_lock);
 
-	return misc_device_ret;
+	ret = isst_misc_reg();
+	if (ret) {
+		/*
+		 * No need of mutex as the misc device register failed
+		 * as no one can open device yet. Hence no contention.
+		 */
+		punit_callbacks[device_type].registered = 0;
+		return ret;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(isst_if_cdev_register);
 
@@ -725,16 +758,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register);
  */
 void isst_if_cdev_unregister(int device_type)
 {
-	mutex_lock(&punit_misc_dev_lock);
-	misc_usage_count--;
+	isst_misc_unreg();
+	mutex_lock(&punit_misc_dev_open_lock);
 	punit_callbacks[device_type].registered = 0;
 	if (device_type == ISST_IF_DEV_MBOX)
 		isst_delete_hash();
-	if (!misc_usage_count && !misc_device_ret) {
-		misc_deregister(&isst_if_char_driver);
-		isst_if_cpu_info_exit();
-	}
-	mutex_unlock(&punit_misc_dev_lock);
+	mutex_unlock(&punit_misc_dev_open_lock);
 }
 EXPORT_SYMBOL_GPL(isst_if_cdev_unregister);
 
-- 
2.25.1


[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux