Hi Manish, > During command status or command complete event, the controller may set > ncmd=0 indicating that it is not accepting any more commands. In such a > case, host holds off sending any more commands to the controller. If the > controller doesn't recover from such condition, host will wait forever, > until the user decides that the Bluetooth is broken and may power cycles > the Bluetooth. > > This patch triggers the hardware error to reset the controller and > driver when it gets into such state as there is no other wat out. > > Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx> > Signed-off-by: Manish Mandlik <mmandlik@xxxxxxxxxx> > --- > > Changes in v3: > - Restructure ncmd_timer scheduling in hci_event.c > - Cancel delayed work in hci_dev_do_close > - Do not inject hw error during HCI_INIT > - Update comment, add log message while injecting hw error > > Changes in v2: > - Emit the hardware error when ncmd=0 occurs > > include/net/bluetooth/hci.h | 1 + > include/net/bluetooth/hci_core.h | 1 + > net/bluetooth/hci_core.c | 22 ++++++++++++++++++++++ > net/bluetooth/hci_event.c | 22 ++++++++++++++++++---- > 4 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h > index ea4ae551c426..c4b0650fb9ae 100644 > --- a/include/net/bluetooth/hci.h > +++ b/include/net/bluetooth/hci.h > @@ -339,6 +339,7 @@ enum { > #define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */ > #define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */ > #define HCI_CMD_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ > +#define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ > #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ > #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ > #define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > index ebdd4afe30d2..f14692b39fd5 100644 > --- a/include/net/bluetooth/hci_core.h > +++ b/include/net/bluetooth/hci_core.h > @@ -470,6 +470,7 @@ struct hci_dev { > struct delayed_work service_cache; > > struct delayed_work cmd_timer; > + struct delayed_work ncmd_timer; > > struct work_struct rx_work; > struct work_struct cmd_work; > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > index b0d9c36acc03..37789c5d0579 100644 > --- a/net/bluetooth/hci_core.c > +++ b/net/bluetooth/hci_core.c > @@ -1723,6 +1723,7 @@ int hci_dev_do_close(struct hci_dev *hdev) > } > > cancel_delayed_work(&hdev->power_off); > + cancel_delayed_work(&hdev->ncmd_timer); > > hci_request_cancel_all(hdev); > hci_req_sync_lock(hdev); > @@ -2769,6 +2770,24 @@ static void hci_cmd_timeout(struct work_struct *work) > queue_work(hdev->workqueue, &hdev->cmd_work); > } > > +/* HCI ncmd timer function */ > +static void hci_ncmd_timeout(struct work_struct *work) > +{ > + struct hci_dev *hdev = container_of(work, struct hci_dev, > + ncmd_timer.work); > + > + bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0"); > + > + /* No hardware error event needs to be injected if the ncmd timer > + * triggers during HCI_INIT. > + */ while the patch looks good, I would be more strongly with my wording here. /* During HCI_INIT phase no events can be injected if the ncmd timer * triggers since the procedure has its own timeout handling. */ > + if (test_bit(HCI_INIT, &hdev->flags)) > + return; > + > + /* This is an irrecoverable state, inject hardware error event */ > + hci_reset_dev(hdev); > +} > + > struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev, > bdaddr_t *bdaddr, u8 bdaddr_type) > { > @@ -3831,6 +3850,7 @@ struct hci_dev *hci_alloc_dev(void) > init_waitqueue_head(&hdev->suspend_wait_q); > > INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); > + INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout); > > hci_request_setup(hdev); > > @@ -4068,6 +4088,8 @@ int hci_reset_dev(struct hci_dev *hdev) > hci_skb_pkt_type(skb) = HCI_EVENT_PKT; > skb_put_data(skb, hw_err, 3); > > + bt_dev_err(hdev, "Injecting HCI hardware error event"); > + > /* Send Hardware Error to upper stack */ > return hci_recv_frame(hdev, skb); > } > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > index cf2f4a0abdbd..8cd4bcf5dd00 100644 > --- a/net/bluetooth/hci_event.c > +++ b/net/bluetooth/hci_event.c > @@ -3635,8 +3635,15 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, > if (*opcode != HCI_OP_NOP) > cancel_delayed_work(&hdev->cmd_timer); > > - if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) > - atomic_set(&hdev->cmd_cnt, 1); > + if (!test_bit(HCI_RESET, &hdev->flags)) { > + if (ev->ncmd) { > + cancel_delayed_work(&hdev->ncmd_timer); > + atomic_set(&hdev->cmd_cnt, 1); > + } else { > + schedule_delayed_work(&hdev->ncmd_timer, > + HCI_NCMD_TIMEOUT); > + } > + } > > hci_req_cmd_complete(hdev, *opcode, *status, req_complete, > req_complete_skb); > @@ -3740,8 +3747,15 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, > if (*opcode != HCI_OP_NOP) > cancel_delayed_work(&hdev->cmd_timer); > > - if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) > - atomic_set(&hdev->cmd_cnt, 1); > + if (!test_bit(HCI_RESET, &hdev->flags)) { > + if (ev->ncmd) { > + cancel_delayed_work(&hdev->ncmd_timer); > + atomic_set(&hdev->cmd_cnt, 1); > + } else { > + schedule_delayed_work(&hdev->ncmd_timer, > + HCI_NCMD_TIMEOUT); > + } > + } > Since the code is getting a bit more complex now, I would prefer that in a follow up patch we provide a common helper function for this. static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, bool is_nop) { if (!is_nop) cancel_delayed_work(&hdev->cmd_timer); if (!test_bit(HCI_RESET, ..) { .. } } And then you can just do: handle_cmd_cnt_and_timer(hdev, *opcode == HCI_OP_NOP); Or something similar to this. Regards Marcel