Hi Pauli, On Tue, Dec 3, 2024 at 5:48 PM Pauli Virtanen <pav@xxxxxx> wrote: > > Hi, > > ti, 2024-12-03 kello 16:14 -0500, Luiz Augusto von Dentz kirjoitti: > > From: Luiz Augusto von Dentz <luiz.von.dentz@xxxxxxxxx> > > > > This reworks hci_cb_list to not use mutex hci_cb_list_lock to avoid bugs > > like the bellow: > > > > BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585 > > in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5070, name: kworker/u9:2 > > preempt_count: 0, expected: 0 > > RCU nest depth: 1, expected: 0 > > 4 locks held by kworker/u9:2/5070: > > #0: ffff888015be3948 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] > > #0: ffff888015be3948 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x1770 kernel/workqueue.c:3335 > > #1: ffffc90003b6fd00 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] > > #1: ffffc90003b6fd00 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x1770 kernel/workqueue.c:3335 > > #2: ffff8880665d0078 (&hdev->lock){+.+.}-{3:3}, at: hci_le_create_big_complete_evt+0xcf/0xae0 net/bluetooth/hci_event.c:6914 > > #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] > > #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] > > #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: hci_le_create_big_complete_evt+0xdb/0xae0 net/bluetooth/hci_event.c:6915 > > CPU: 0 PID: 5070 Comm: kworker/u9:2 Not tainted 6.8.0-syzkaller-08073-g480e035fc4c7 #0 > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 > > Workqueue: hci0 hci_rx_work > > Call Trace: > > <TASK> > > __dump_stack lib/dump_stack.c:88 [inline] > > dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 > > __might_resched+0x5d4/0x780 kernel/sched/core.c:10187 > > __mutex_lock_common kernel/locking/mutex.c:585 [inline] > > __mutex_lock+0xc1/0xd70 kernel/locking/mutex.c:752 > > hci_connect_cfm include/net/bluetooth/hci_core.h:2004 [inline] > > hci_le_create_big_complete_evt+0x3d9/0xae0 net/bluetooth/hci_event.c:6939 > > hci_event_func net/bluetooth/hci_event.c:7514 [inline] > > hci_event_packet+0xa53/0x1540 net/bluetooth/hci_event.c:7569 > > hci_rx_work+0x3e8/0xca0 net/bluetooth/hci_core.c:4171 > > process_one_work kernel/workqueue.c:3254 [inline] > > process_scheduled_works+0xa00/0x1770 kernel/workqueue.c:3335 > > worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 > > kthread+0x2f0/0x390 kernel/kthread.c:388 > > ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 > > ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 > > </TASK> > > > > Reported-by: syzbot+2fb0835e0c9cefc34614@xxxxxxxxxxxxxxxxxxxxxxxxx > > Closes: https://syzkaller.appspot.com/bug?extid=2fb0835e0c9cefc34614 > > Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@xxxxxxxxx> > > --- > > include/net/bluetooth/hci_core.h | 89 ++++++++++++++++++++++---------- > > net/bluetooth/hci_core.c | 9 ++-- > > 2 files changed, 65 insertions(+), 33 deletions(-) > > > > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > > index ea798f07c5a2..95f11f04e24a 100644 > > --- a/include/net/bluetooth/hci_core.h > > +++ b/include/net/bluetooth/hci_core.h > > @@ -804,7 +804,6 @@ struct hci_conn_params { > > extern struct list_head hci_dev_list; > > extern struct list_head hci_cb_list; > > extern rwlock_t hci_dev_list_lock; > > -extern struct mutex hci_cb_list_lock; > > > > #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) > > #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) > > @@ -2029,12 +2028,18 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) > > { > > struct hci_cb *cb; > > > > - mutex_lock(&hci_cb_list_lock); > > - list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->connect_cfm) > > - cb->connect_cfm(conn, status); > > + rcu_read_lock(); > > + list_for_each_entry_rcu(cb, &hci_cb_list, list) { > > + if (cb->connect_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.connect_cfm(conn, status); > > + rcu_read_lock(); > > This looks like incorrect RCU usage > > [CPU 1] rcu_read_unlock() > [CPU 2] hci_unregister_cb(cb) > [CPU 2] hci_unregister_cb(next cb) > [CPU 1] rcu_read_lock() > [CPU 1] list_for_each_entry_rcu -> iterates to "next cb" not in list > > If all hci_cb weren't static, it'd also UAF (maybe it is for rfcomm?). > > > hci_le_create_big_complete_evt() also does this (and maybe crashes if > ev->status != 0 ?), so maybe it is simples to fix it. I guess you are right, it seems incorrect to have rcu_read_unlock/relock inside list_for_each_entry_rcu, that said I wonder why the entry is not accessed via rcu_dereference, anyway the only alternative I can think of is to copy to a local stack list which we can then run with list_for_each_entry_safe, or we create some mechanism to differ actions that otherwise could block/sleep while holding rcu_read_lock. > > > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > > > if (conn->connect_cfm_cb) > > conn->connect_cfm_cb(conn, status); > > @@ -2044,12 +2049,18 @@ static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) > > { > > struct hci_cb *cb; > > > > - mutex_lock(&hci_cb_list_lock); > > + rcu_read_lock(); > > list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->disconn_cfm) > > - cb->disconn_cfm(conn, reason); > > + if (cb->disconn_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.disconn_cfm(conn, reason); > > + rcu_read_lock(); > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > > > if (conn->disconn_cfm_cb) > > conn->disconn_cfm_cb(conn, reason); > > @@ -2065,12 +2076,18 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) > > > > encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; > > > > - mutex_lock(&hci_cb_list_lock); > > + rcu_read_lock(); > > list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->security_cfm) > > - cb->security_cfm(conn, status, encrypt); > > + if (cb->security_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.security_cfm(conn, status, encrypt); > > + rcu_read_lock(); > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > > > if (conn->security_cfm_cb) > > conn->security_cfm_cb(conn, status); > > @@ -2105,12 +2122,18 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) > > conn->sec_level = conn->pending_sec_level; > > } > > > > - mutex_lock(&hci_cb_list_lock); > > + rcu_read_lock(); > > list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->security_cfm) > > - cb->security_cfm(conn, status, encrypt); > > + if (cb->security_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.security_cfm(conn, status, encrypt); > > + rcu_read_lock(); > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > > > if (conn->security_cfm_cb) > > conn->security_cfm_cb(conn, status); > > @@ -2120,12 +2143,18 @@ static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) > > { > > struct hci_cb *cb; > > > > - mutex_lock(&hci_cb_list_lock); > > + rcu_read_lock(); > > list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->key_change_cfm) > > - cb->key_change_cfm(conn, status); > > + if (cb->key_change_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.key_change_cfm(conn, status); > > + rcu_read_lock(); > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > } > > > > static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, > > @@ -2133,12 +2162,18 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, > > { > > struct hci_cb *cb; > > > > - mutex_lock(&hci_cb_list_lock); > > - list_for_each_entry(cb, &hci_cb_list, list) { > > - if (cb->role_switch_cfm) > > - cb->role_switch_cfm(conn, status, role); > > + rcu_read_lock(); > > + list_for_each_entry_rcu(cb, &hci_cb_list, list) { > > + if (cb->role_switch_cfm) { > > + struct hci_cb cpy = *cb; > > + > > + /* Callback may block so release RCU read lock */ > > + rcu_read_unlock(); > > + cpy.role_switch_cfm(conn, status, role); > > + rcu_read_lock(); > > + } > > } > > - mutex_unlock(&hci_cb_list_lock); > > + rcu_read_unlock(); > > } > > > > static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) > > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > > index f9e19f9cb5a3..25d180d225c1 100644 > > --- a/net/bluetooth/hci_core.c > > +++ b/net/bluetooth/hci_core.c > > @@ -2993,9 +2993,7 @@ int hci_register_cb(struct hci_cb *cb) > > { > > BT_DBG("%p name %s", cb, cb->name); > > > > - mutex_lock(&hci_cb_list_lock); > > - list_add_tail(&cb->list, &hci_cb_list); > > - mutex_unlock(&hci_cb_list_lock); > > + list_add_tail_rcu(&cb->list, &hci_cb_list); > > > > return 0; > > } > > @@ -3005,9 +3003,8 @@ int hci_unregister_cb(struct hci_cb *cb) > > { > > BT_DBG("%p name %s", cb, cb->name); > > > > - mutex_lock(&hci_cb_list_lock); > > - list_del(&cb->list); > > - mutex_unlock(&hci_cb_list_lock); > > + list_del_rcu(&cb->list); > > + synchronize_rcu(); > > > > return 0; > > } > -- Luiz Augusto von Dentz