syzbot is reporting attempt to schedule hdev->cmd_work work from system_wq WQ into hdev->workqueue WQ which is under draining operation [1], for commit c8efcc2589464ac7 ("workqueue: allow chained queueing during destruction") does not allow such operation. The check introduced by commit 877afadad2dce8aa ("Bluetooth: When HCI work queue is drained, only queue chained work") was incomplete. Use hdev->workqueue WQ when queuing hdev->{cmd,ncmd}_timer works because hci_{cmd,ncmd}_timeout() calls queue_work(hdev->workqueue). Also, protect the queuing operation with RCU read lock in order to avoid calling queue_delayed_work() after cancel_delayed_work() completed. Link: https://syzkaller.appspot.com/bug?extid=243b7d89777f90f7613b [1] Reported-by: syzbot <syzbot+243b7d89777f90f7613b@xxxxxxxxxxxxxxxxxxxxxxxxx> Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Fixes: 877afadad2dce8aa ("Bluetooth: When HCI work queue is drained, only queue chained work") --- This is a difficult to trigger race condition, and therefore reproducer is not available. Please do logical check in addition to automated testing. net/bluetooth/hci_core.c | 15 +++++++++++++-- net/bluetooth/hci_event.c | 6 ++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b3a5a3cc9372..9873d2e67988 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -597,6 +597,15 @@ static int hci_dev_do_reset(struct hci_dev *hdev) /* Cancel these to avoid queueing non-chained pending work */ hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + /* Wait for + * + * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + * queue_delayed_work(&hdev->{cmd,ncmd}_timer) + * + * inside RCU section to see the flag or complete scheduling. + */ + synchronize_rcu(); + /* Explicitly cancel works in case scheduled after setting the flag. */ cancel_delayed_work(&hdev->cmd_timer); cancel_delayed_work(&hdev->ncmd_timer); @@ -4056,12 +4065,14 @@ static void hci_cmd_work(struct work_struct *work) if (res < 0) __hci_cmd_sync_cancel(hdev, -res); + rcu_read_lock(); if (test_bit(HCI_RESET, &hdev->flags) || hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) cancel_delayed_work(&hdev->cmd_timer); else - schedule_delayed_work(&hdev->cmd_timer, - HCI_CMD_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } else { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6643c9c20fa4..d6f0e6ca0e7e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3766,16 +3766,18 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) { cancel_delayed_work(&hdev->cmd_timer); + rcu_read_lock(); if (!test_bit(HCI_RESET, &hdev->flags)) { if (ncmd) { cancel_delayed_work(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); } else { if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - schedule_delayed_work(&hdev->ncmd_timer, - HCI_NCMD_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->ncmd_timer, + HCI_NCMD_TIMEOUT); } } + rcu_read_unlock(); } static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data, -- 2.18.4