Hi Balakrishna, >>>>> We will collect the ramdump of BT controller when hardware error event >>>>> received before rebooting the HCI layer. Before restarting a subsystem >>>>> or a process running on a subsystem, it is often required to request >>>>> either a subsystem or a process to perform proper cache dump and >>>>> software failure reason into a memory buffer which application >>>>> processor can retrieve afterwards. SW developers can often provide >>>>> initial investigation by looking into that debugging information. >>>>> Signed-off-by: Balakrishna Godavarthi <bgodavar@xxxxxxxxxxxxxx> >>>>> --- >>>>> changes v2: >>>>> * entirely an newer approach handling with an work queue. >>>>> --- >>>>> drivers/bluetooth/hci_qca.c | 289 ++++++++++++++++++++++++++++++++++-- >>>>> 1 file changed, 278 insertions(+), 11 deletions(-) >>>>> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c >>>>> index f036c8f98ea3..62b768bc32ec 100644 >>>>> --- a/drivers/bluetooth/hci_qca.c >>>>> +++ b/drivers/bluetooth/hci_qca.c >>>>> @@ -32,6 +32,7 @@ >>>>> #include <linux/clk.h> >>>>> #include <linux/debugfs.h> >>>>> #include <linux/delay.h> >>>>> +#include <linux/devcoredump.h> >>>>> #include <linux/device.h> >>>>> #include <linux/gpio/consumer.h> >>>>> #include <linux/mod_devicetable.h> >>>> don't we have some crashdump core facility that could be utilized here? >>> [Bala]: no i don't think so. that is reason calling devcoredump.h >> what is wrong with using devcoredump.h? > > [Bala]: using the devcoredump.h > >>>>> @@ -57,9 +58,10 @@ >>>>> /* Controller states */ >>>>> #define STATE_IN_BAND_SLEEP_ENABLED 1 >>>>> -#define IBS_WAKE_RETRANS_TIMEOUT_MS 100 >>>>> +#define IBS_WAKE_RETRANS_TIMEOUT_MS 100 >>>>> #define IBS_TX_IDLE_TIMEOUT_MS 2000 >>>>> -#define BAUDRATE_SETTLE_TIMEOUT_MS 300 >>>>> +#define BAUDRATE_SETTLE_TIMEOUT_MS 300 >>>>> +#define MEMDUMP_COLLECTION_TIMEOUT_MS 8000 >>>>> /* susclk rate */ >>>>> #define SUSCLK_RATE_32KHZ 32768 >>>>> @@ -67,6 +69,13 @@ >>>>> /* Controller debug log header */ >>>>> #define QCA_DEBUG_HANDLE 0x2EDC >>>>> +/* Controller dump header */ >>>>> +#define QCA_SSR_DUMP_HANDLE 0x0108 >>>>> +#define QCA_DUMP_PACKET_SIZE 255 >>>>> +#define QCA_LAST_SEQUENCE_NUM 0xFFFF >>>>> +#define QCA_CRASHBYTE_PACKET_LEN 1100 >>>>> +#define QCA_MEMDUMP_BYTE 0xFB >>>>> + >>>>> /* HCI_IBS transmit side sleep protocol states */ >>>>> enum tx_ibs_states { >>>>> HCI_IBS_TX_ASLEEP, >>>>> @@ -89,12 +98,41 @@ enum hci_ibs_clock_state_vote { >>>>> HCI_IBS_RX_VOTE_CLOCK_OFF, >>>>> }; >>>>> +/* Controller memory dump states */ >>>>> +enum qca_memdump_states { >>>>> + QCA_MEMDUMP_IDLE, >>>>> + QCA_MEMDUMP_COLLECTING, >>>>> + QCA_MEMDUMP_COLLECTED, >>>>> + QCA_MEMDUMP_TIMEOUT, >>>>> +}; >>>>> + >>>>> +struct qca_memdump_data { >>>>> + char *memdump_buf_head; >>>>> + char *memdump_buf_tail; >>>>> + u32 current_seq_no; >>>>> + u32 received_dump; >>>>> +}; >>>>> + >>>>> +struct qca_memdump_event_hdr { >>>>> + __u8 evt; >>>>> + __u8 plen; >>>>> + __u16 opcode; >>>>> + __u16 seq_no; >>>>> + __u8 reserved; >>>>> +} __packed; >>>>> + >>>>> + >>>>> +struct qca_dump_size { >>>>> + u32 dump_size; >>>>> +} __packed; >>>>> + >>>>> struct qca_data { >>>>> struct hci_uart *hu; >>>>> struct sk_buff *rx_skb; >>>>> struct sk_buff_head txq; >>>>> - struct sk_buff_head tx_wait_q; /* HCI_IBS wait queue */ >>>>> - spinlock_t hci_ibs_lock; /* HCI_IBS state lock */ >>>>> + struct sk_buff_head tx_wait_q; /* HCI_IBS wait queue */ >>>>> + struct sk_buff_head rx_memdump_q; /* Memdump wait queue */ >>>>> + spinlock_t hci_ibs_lock; /* HCI_IBS state lock */ >>>>> u8 tx_ibs_state; /* HCI_IBS transmit side power state*/ >>>>> u8 rx_ibs_state; /* HCI_IBS receive side power state */ >>>>> bool tx_vote; /* Clock must be on for TX */ >>>>> @@ -103,12 +141,17 @@ struct qca_data { >>>>> u32 tx_idle_delay; >>>>> struct timer_list wake_retrans_timer; >>>>> u32 wake_retrans; >>>>> + struct timer_list memdump_timer; >>>>> + u32 memdump_delay; >>>>> struct workqueue_struct *workqueue; >>>>> struct work_struct ws_awake_rx; >>>>> struct work_struct ws_awake_device; >>>>> struct work_struct ws_rx_vote_off; >>>>> struct work_struct ws_tx_vote_off; >>>>> + struct work_struct ctrl_memdump_evt; >>>>> + struct qca_memdump_data *qca_memdump; >>>>> unsigned long flags; >>>>> + enum qca_memdump_states memdump_state; >>>>> /* For debugging purpose */ >>>>> u64 ibs_sent_wacks; >>>>> @@ -173,6 +216,7 @@ struct qca_serdev { >>>>> static int qca_power_setup(struct hci_uart *hu, bool on); >>>>> static void qca_power_shutdown(struct hci_uart *hu); >>>>> static int qca_power_off(struct hci_dev *hdev); >>>>> +static void qca_controller_memdump(struct work_struct *work); >>>>> static void __serial_clock_on(struct tty_struct *tty) >>>>> { >>>>> @@ -446,6 +490,21 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t) >>>>> hci_uart_tx_wakeup(hu); >>>>> } >>>>> +static void hci_memdump_timeout(struct timer_list *t) >>>>> +{ >>>>> + struct qca_data *qca = from_timer(qca, t, tx_idle_timer); >>>>> + struct hci_uart *hu = qca->hu; >>>>> + struct qca_memdump_data *qca_memdump = qca->qca_memdump; >>>>> + char *memdump_buf = qca_memdump->memdump_buf_tail; >>>>> + >>>>> + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); >>>>> + kfree(memdump_buf); >>>>> + kfree(qca_memdump); >>>>> + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; >>>>> + del_timer(&qca->memdump_timer); >>>>> + cancel_work_sync(&qca->ctrl_memdump_evt); >>>>> +} >>>>> + >>>>> /* Initialize protocol */ >>>>> static int qca_open(struct hci_uart *hu) >>>>> { >>>>> @@ -461,6 +520,7 @@ static int qca_open(struct hci_uart *hu) >>>>> skb_queue_head_init(&qca->txq); >>>>> skb_queue_head_init(&qca->tx_wait_q); >>>>> + skb_queue_head_init(&qca->rx_memdump_q); >>>>> spin_lock_init(&qca->hci_ibs_lock); >>>>> qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); >>>>> if (!qca->workqueue) { >>>>> @@ -473,6 +533,7 @@ static int qca_open(struct hci_uart *hu) >>>>> INIT_WORK(&qca->ws_awake_device, qca_wq_awake_device); >>>>> INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off); >>>>> INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); >>>>> + INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); >>>>> qca->hu = hu; >>>>> @@ -500,7 +561,7 @@ static int qca_open(struct hci_uart *hu) >>>>> qca->tx_votes_off = 0; >>>>> qca->rx_votes_on = 0; >>>>> qca->rx_votes_off = 0; >>>>> - >>>>> + qca->memdump_state = QCA_MEMDUMP_IDLE; >>>>> hu->priv = qca; >>>>> if (hu->serdev) { >>>>> @@ -528,6 +589,9 @@ static int qca_open(struct hci_uart *hu) >>>>> timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0); >>>>> qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS; >>>>> + timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0); >>>>> + qca->memdump_delay = MEMDUMP_COLLECTION_TIMEOUT_MS; >>>>> + >>>>> BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u", >>>>> qca->tx_idle_delay, qca->wake_retrans); >>>>> @@ -605,8 +669,10 @@ static int qca_close(struct hci_uart *hu) >>>>> skb_queue_purge(&qca->tx_wait_q); >>>>> skb_queue_purge(&qca->txq); >>>>> + skb_queue_purge(&qca->rx_memdump_q); >>>>> del_timer(&qca->tx_idle_timer); >>>>> del_timer(&qca->wake_retrans_timer); >>>>> + del_timer(&qca->memdump_timer); >>>>> destroy_workqueue(qca->workqueue); >>>>> qca->hu = NULL; >>>>> @@ -866,6 +932,141 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb) >>>>> return hci_recv_frame(hdev, skb); >>>>> } >>>>> +static void qca_controller_memdump(struct work_struct *work) >>>>> +{ >>>>> + struct qca_data *qca = container_of(work, struct qca_data, >>>>> + ctrl_memdump_evt); >>>>> + struct hci_uart *hu = qca->hu; >>>>> + struct sk_buff *skb; >>>>> + struct qca_memdump_event_hdr *cmd_hdr; >>>>> + struct qca_memdump_data *qca_memdump = qca->qca_memdump; >>>>> + struct qca_dump_size *dump; >>>>> + char *memdump_buf; >>>>> + char nullBuff[QCA_DUMP_PACKET_SIZE] = { 0 }; >>>>> + u16 opcode, seq_no; >>>>> + u32 dump_size; >>>>> + >>>>> + while ((skb = skb_dequeue(&qca->rx_memdump_q))) { >>>>> + >>>>> + if (!qca_memdump) { >>>>> + qca_memdump = kzalloc(sizeof(struct qca_memdump_data), >>>>> + GFP_ATOMIC); >>>>> + if (!qca_memdump) >>>>> + return; >>>>> + >>>>> + qca->qca_memdump = qca_memdump; >>>>> + } >>>>> + >>>>> + qca->memdump_state = QCA_MEMDUMP_COLLECTING; >>>>> + cmd_hdr = (void *) skb->data; >>>>> + opcode = __le16_to_cpu(cmd_hdr->opcode); >>>>> + seq_no = __le16_to_cpu(cmd_hdr->seq_no); >>>>> + skb_pull(skb, sizeof(struct qca_memdump_event_hdr)); >>>>> + >>>>> + if (!seq_no) { >>>>> + >>>>> + /* This is the first frame of memdump packet from >>>>> + * the controller, Disable IBS to recevie dump >>>>> + * with out any interruption, ideally time required for >>>>> + * the controller to send the dump is 8 seconds. let us >>>>> + * start timer to handle this asynchronous activity. >>>>> + */ >>>>> + clear_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags); >>>>> + >>>>> + dump = (void *) skb->data; >>>>> + dump_size = __le32_to_cpu(dump->dump_size); >>>>> + if (!(dump_size)) { >>>>> + bt_dev_err(hu->hdev, "QCA invalid memdump size"); >>>>> + kfree_skb(skb); >>>>> + return; >>>>> + } >>>>> + >>>>> + bt_dev_info(hu->hdev, "QCA: collecting memdump started" >>>>> + "of size:%u", dump_size); >>>>> + mod_timer(&qca->memdump_timer, (jiffies + >>>>> + msecs_to_jiffies(qca->memdump_delay))); >>>>> + >>>>> + skb_pull(skb, sizeof(dump_size)); >>>>> + memdump_buf = vmalloc(dump_size); >>>>> + qca_memdump->memdump_buf_head = memdump_buf; >>>>> + qca_memdump->memdump_buf_tail = memdump_buf; >>>>> + } >>>>> + >>>>> + memdump_buf = qca_memdump->memdump_buf_tail; >>>>> + >>>>> + /* If sequence no 0 is missed then there is no point in >>>>> + * accepting the other sequences. >>>>> + */ >>>>> + if (!memdump_buf) { >>>>> + bt_dev_err(hu->hdev, "QCA: Discarding other packets"); >>>>> + kfree(qca_memdump); >>>>> + kfree_skb(skb); >>>>> + qca->qca_memdump = NULL; >>>>> + return; >>>>> + } >>>>> + >>>>> + /* There could be chance of missing some packets from >>>>> + * the controller. In such cases let us store the dummy >>>>> + * packets in the buffer. >>>>> + */ >>>>> + while ((seq_no > qca_memdump->current_seq_no + 1) && >>>>> + seq_no != QCA_LAST_SEQUENCE_NUM) { >>>>> + bt_dev_err(hu->hdev, "QCA controller missed packet:%d", >>>>> + qca_memdump->current_seq_no); >>>>> + memcpy(memdump_buf, nullBuff, QCA_DUMP_PACKET_SIZE); >>>>> + memdump_buf = memdump_buf + QCA_DUMP_PACKET_SIZE; >>>>> + qca_memdump->received_dump += QCA_DUMP_PACKET_SIZE; >>>>> + qca_memdump->current_seq_no++; >>>>> + } >>>>> + >>>>> + memcpy(memdump_buf, (unsigned char *) skb->data, skb->len); >>>>> + memdump_buf = memdump_buf + skb->len; >>>>> + qca_memdump->memdump_buf_tail = memdump_buf; >>>>> + qca_memdump->current_seq_no = seq_no + 1; >>>>> + qca_memdump->received_dump += skb->len; >>>>> + qca->qca_memdump = qca_memdump; >>>>> + kfree_skb(skb); >>>>> + if (seq_no == QCA_LAST_SEQUENCE_NUM) { >>>>> + bt_dev_info(hu->hdev, "QCA writing crash dump of size %d bytes", >>>>> + qca_memdump->received_dump); >>>>> + memdump_buf = qca_memdump->memdump_buf_head; >>>>> + dev_coredumpv(&hu->serdev->dev, memdump_buf, >>>>> + qca_memdump->received_dump, GFP_KERNEL); >>>>> + // Revisit for free(memdump) if needed. >>>>> + del_timer(&qca->memdump_timer); >>>>> + kfree(qca->qca_memdump); >>>>> + qca->qca_memdump = NULL; >>>>> + qca->memdump_state = QCA_MEMDUMP_COLLECTED; >>>>> + } >>>>> + } >>>>> + >>>>> +} >>>>> + >>>>> +int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb) >>>>> +{ >>>>> + struct hci_uart *hu = hci_get_drvdata(hdev); >>>>> + struct qca_data *qca = hu->priv; >>>>> + >>>>> + skb_queue_tail(&qca->rx_memdump_q, skb); >>>>> + queue_work(qca->workqueue, &qca->ctrl_memdump_evt); >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> +static int qca_recv_event_data(struct hci_dev *hdev, struct sk_buff *skb) >>>>> +{ >>>>> + /* We receive chip memory dump as an event packet, With a dedicated >>>>> + * handler followed by a hardware error event. When this event is >>>>> + * received we store dump into a file before closing hci. This >>>>> + * dump will help in triaging the issues. >>>>> + */ >>>>> + if ((skb->data[0] == HCI_VENDOR_PKT) && >>>>> + (get_unaligned_be16((skb->data) + 2) == QCA_SSR_DUMP_HANDLE)) >>>> The (skb->data) is pointless, just do skb->data. >>> [Bala]: will update. >>>>> + return qca_controller_memdump_event(hdev, skb); >>>>> + >>>>> + return hci_recv_frame(hdev, skb); >>>>> +} >>>>> + >>>>> #define QCA_IBS_SLEEP_IND_EVENT \ >>>>> .type = HCI_IBS_SLEEP_IND, \ >>>>> .hlen = 0, \ >>>>> @@ -888,12 +1089,12 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb) >>>>> .maxlen = HCI_MAX_IBS_SIZE >>>>> static const struct h4_recv_pkt qca_recv_pkts[] = { >>>>> - { H4_RECV_ACL, .recv = qca_recv_acl_data }, >>>>> - { H4_RECV_SCO, .recv = hci_recv_frame }, >>>>> - { H4_RECV_EVENT, .recv = hci_recv_frame }, >>>>> - { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind }, >>>>> - { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack }, >>>>> - { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind }, >>>>> + { H4_RECV_ACL, .recv = qca_recv_acl_data }, >>>>> + { H4_RECV_SCO, .recv = hci_recv_frame }, >>>>> + { H4_RECV_EVENT, .recv = qca_recv_event_data }, >>>> call it qca_recv_event and don't realign the whole table. >>> [Bala]: will update. >>>>> + { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind }, >>>>> + { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack }, >>>>> + { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind }, >>>>> }; >>>>> static int qca_recv(struct hci_uart *hu, const void *data, int count) >>>>> @@ -1114,6 +1315,68 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) >>>>> return 0; >>>>> } >>>>> +static int qca_send_crashbuffer(struct hci_uart *hu) >>>>> +{ >>>>> + struct qca_data *qca = hu->priv; >>>>> + struct sk_buff *skb; >>>>> + unsigned char buf[QCA_CRASHBYTE_PACKET_LEN]; >>>>> + >>>>> + bt_dev_dbg(hu->hdev, "sending soc crash buffer to controller"); >>>>> + >>>>> + /* We forcefully crash the controller, by sending 0xfb byte for >>>>> + * 1024 times. We also might have chance of losing data, To be >>>>> + * on safer side we send 1100 bytes to the SoC. >>>>> + */ >>>>> + memset(buf, QCA_MEMDUMP_BYTE, QCA_CRASHBYTE_PACKET_LEN); >>>>> + >>>>> + skb = bt_skb_alloc(QCA_CRASHBYTE_PACKET_LEN, GFP_KERNEL); >>>>> + if (!skb) { >>>>> + bt_dev_err(hu->hdev, "Failed to allocate memory for skb packet"); >>>>> + return -ENOMEM; >>>>> + } >>>>> + >>>>> + skb_put_data(skb, buf, QCA_CRASHBYTE_PACKET_LEN); >>>>> + hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; >>>>> + >>>>> + skb_queue_tail(&qca->txq, skb); >>>>> + hci_uart_tx_wakeup(hu); >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> +static void qca_hw_error(struct hci_dev *hdev, u8 code) >>>>> +{ >>>>> + struct hci_uart *hu = hci_get_drvdata(hdev); >>>>> + struct qca_data *qca = hu->priv; >>>>> + >>>>> + bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); >>>>> + >>>>> + if (qca->memdump_state == QCA_MEMDUMP_IDLE) { >>>>> + /* If hardware error event received for other than QCA >>>>> + * soc memory dump event, then we need to crash the SOC >>>>> + * and wait here for 8 seconds to get the dump packets. >>>>> + * This will block main thread to be on hold until we >>>>> + * collect dump. >>>>> + */ >>>>> + mod_timer(&qca->memdump_timer, >>>>> + (jiffies + msecs_to_jiffies(qca->memdump_delay))); >>>> Why a timer that then ends up in interrupt context? >>> [Bala]: some times we only receive hardware error from the chip, so we are sending an special buffer to request the >>> chip for memory dump. we are using an timer to check whether the data memdump is collected. if the timer >>> over flows we are cleaning up all the assigned memory and unblock the main thread, >>> in short this timer is a watch dog timer for the memory dump collection. >> I saw that you used a bunch of timers actually. My question is why are >> these not delayed_wq instead? I wonder if we really want to keep using >> timers and handling interrupt context locking if we could move >> everything to wq instead. > > [Bala]: Let me brief you the requirement. > This patch is used to collect the memory dump received from the chip and store it into a file. > We receive chip memory dump in two cases > 1. When chip goes into the bad state it sends the dump followed by and hardware error event. > 2. Some time when we send some garbage or un recognized command to chip. > it will respond with a hardware error event. So we send an 0xfb bytes to the chip requesting it to send the memory dump. > > In both cases, chip will go into the bad state once it sends the dump. so we need to turn OFF and ON chip and perform the init procedure. > So keeping the above cases in the view, we register an handle for hardware error event. So that in both the cases hw error routine will execute. > > As the dump as some specific headers we will queue the received data into work queue. > We are blocking the main thread until the total dump is stored into the file. > Here is the answer for the question, yes we check the status of the work queue but for handling case 2 we require timers. > But let us assume the wq is taking more time than expected. Then we also need a timer. > we are using timer so that we will not block the main thread indefinitely. > They are some cases where when we request a memory dump, after some packet sent by the chip. > it may go into a bad state where it will not pump the remaining dump, so we are using a timer to unblock the main thread. > so during timeout, we are restarting the flags to unblock the main thread. > > >>>>> + qca_send_crashbuffer(hu); >>>>> + >>>>> + while (qca->memdump_state != QCA_MEMDUMP_COLLECTED && >>>>> + qca->memdump_state != QCA_MEMDUMP_TIMEOUT) >>>>> + ; >>>> What is this? Busy loop? I rather not do that. >>> [Bala]: this busy required to collected the dump from the chip before we close an reopen the hci. >>> >> + } else if (qca->memdump_state == QCA_MEMDUMP_COLLECTING) { >> Why not use some wakeup handling here. I am really against any kind of >> busy loops in a driver. > [Bala]: this busy loop is require. pls refer my comments to the above query. > this will help in mainly in case 2, where we will block the main thread until we have dump collected. > > let us assume that we received an hardware error event, hw error routine we execute which sends an > special byte to the chip requesting for memory dump. it starts sending the dump, but we move forward and turn off the > chip so we will miss the dump. so here the busy loop is to wait until we have dump. > but the we are not waiting indefinitely we have timer to unblock them. I do not see how any of this is correct. Can you not use some wakeup helper like everybody else does. I also do not see how these two threads are correctly protected. It looks really hackish to me. Regards Marcel