On Mon, Apr 29, 2019 at 04:21:31PM -0700, Matthias Kaehlcke wrote: > Firmware download to the WCN3990 often fails with a 'TLV response size > mismatch' error: > > [ 133.064659] Bluetooth: hci0: setting up wcn3990 > [ 133.489150] Bluetooth: hci0: QCA controller version 0x02140201 > [ 133.495245] Bluetooth: hci0: QCA Downloading qca/crbtfw21.tlv > [ 133.507214] Bluetooth: hci0: QCA TLV response size mismatch > [ 133.513265] Bluetooth: hci0: QCA Failed to download patch (-84) > > This is caused by a vendor event that corresponds to an earlier command > to change the baudrate. The event is not processed in the context of the > baudrate change and is later interpreted as response to the firmware > download command (which is also a vendor command), but the driver detects > that the event doesn't have the expected amount of associated data. > > More details: > > For the WCN3990 the vendor command for a baudrate change isn't sent as > synchronous HCI command, because the controller sends the corresponding > vendor event with the new baudrate. The event is received and decoded > after the baudrate change of the host port. > > Identify the 'unused' event when it is received and don't add it to > the queue of RX frames. > > Signed-off-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx> > --- > Changes in v3: > - rebased on latest bluetooth-next/master > - removed barrier calls again, bit routines include barriers > > Changes in v2: > - make QCA_DROP_VENDOR_EVENT an enum value and don't use BIT() > - free skb in qca_recv_event() > - add barriers to ensure qca_recv_event() sees updated flags > - return -ETIMEDOUT instead of -EPROTO if the vendor event isn't > received in time > --- > drivers/bluetooth/hci_qca.c | 56 +++++++++++++++++++++++++++++++++++-- > 1 file changed, 53 insertions(+), 3 deletions(-) > > diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c > index 57322c42bb2d..5b57d897d8b5 100644 > --- a/drivers/bluetooth/hci_qca.c > +++ b/drivers/bluetooth/hci_qca.c > @@ -30,6 +30,7 @@ > > #include <linux/kernel.h> > #include <linux/clk.h> > +#include <linux/completion.h> > #include <linux/debugfs.h> > #include <linux/delay.h> > #include <linux/device.h> > @@ -66,6 +67,7 @@ > > enum qca_flags { > QCA_IBS_ENABLED, > + QCA_DROP_VENDOR_EVENT, > }; > > /* HCI_IBS transmit side sleep protocol states */ > @@ -110,6 +112,7 @@ struct qca_data { > struct work_struct ws_rx_vote_off; > struct work_struct ws_tx_vote_off; > unsigned long flags; > + struct completion drop_ev_comp; > > /* For debugging purpose */ > u64 ibs_sent_wacks; > @@ -491,6 +494,7 @@ static int qca_open(struct hci_uart *hu) > INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); > > qca->hu = hu; > + init_completion(&qca->drop_ev_comp); > > /* Assume we start with both sides asleep -- extra wakes OK */ > qca->tx_ibs_state = HCI_IBS_TX_ASLEEP; > @@ -885,6 +889,35 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb) > return hci_recv_frame(hdev, skb); > } > > +static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb) > +{ > + struct hci_uart *hu = hci_get_drvdata(hdev); > + struct qca_data *qca = hu->priv; > + > + if (test_bit(QCA_DROP_VENDOR_EVENT, &qca->flags)) { > + struct hci_event_hdr *hdr = (void *)skb->data; > + > + /* For the WCN3990 the vendor command for a baudrate change > + * isn't sent as synchronous HCI command, because the > + * controller sends the corresponding vendor event with the > + * new baudrate. The event is received and properly decoded > + * after changing the baudrate of the host port. It needs to > + * be dropped, otherwise it can be misinterpreted as > + * response to a later firmware download command (also a > + * vendor command). > + */ > + > + if (hdr->evt == HCI_EV_VENDOR) > + complete(&qca->drop_ev_comp); > + > + kfree(skb); > + > + return 0; > + } > + > + return hci_recv_frame(hdev, skb); > +} > + > #define QCA_IBS_SLEEP_IND_EVENT \ > .type = HCI_IBS_SLEEP_IND, \ > .hlen = 0, \ > @@ -909,7 +942,7 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb) > static const struct h4_recv_pkt qca_recv_pkts[] = { > { H4_RECV_ACL, .recv = qca_recv_acl_data }, > { H4_RECV_SCO, .recv = hci_recv_frame }, > - { H4_RECV_EVENT, .recv = hci_recv_frame }, > + { H4_RECV_EVENT, .recv = qca_recv_event }, > { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind }, > { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack }, > { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind }, > @@ -1104,6 +1137,7 @@ static int qca_check_speeds(struct hci_uart *hu) > static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) > { > unsigned int speed, qca_baudrate; > + struct qca_data *qca = hu->priv; > int ret = 0; > > if (speed_type == QCA_INIT_SPEED) { > @@ -1120,8 +1154,11 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) > /* Disable flow control for wcn3990 to deassert RTS while > * changing the baudrate of chip and host. > */ > - if (qca_is_wcn399x(soc_type)) > + if (qca_is_wcn399x(soc_type)) { > hci_uart_set_flow_control(hu, true); > + reinit_completion(&qca->drop_ev_comp); > + set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); > + } > > qca_baudrate = qca_get_baudrate_value(speed); > bt_dev_dbg(hu->hdev, "Set UART speed to %d", speed); > @@ -1132,8 +1169,21 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) > host_set_baudrate(hu, speed); > > error: > - if (qca_is_wcn399x(soc_type)) > + if (qca_is_wcn399x(soc_type)) { > hci_uart_set_flow_control(hu, false); > + > + /* Wait for the controller to send the vendor event > + * for the baudrate change command. > + */ > + if (!wait_for_completion_timeout(&qca->drop_ev_comp, > + msecs_to_jiffies(100))) { > + bt_dev_err(hu->hdev, > + "Failed to change controller baudrate\n"); > + ret = -ETIMEDOUT; > + } > + > + clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); > + } > } > > return ret; The alternative to this patch could be a short delay in the right place, as commented on v2 (https://lore.kernel.org/patchwork/patch/1048463/#1252739). This series has been floating around for two months and hasn't received any maintainer attention (or at least no replies). I don't pretend it's a pretty solution, if maintainers prefer we can go for the simpler option to add a delay plus a comment explaining why it is needed. In any case let's do something, firmware download has been broken since forever even though possible fixes are available :( Thanks Matthias