Hi En-Wei, On Sun, Dec 1, 2024 at 9:30 PM En-Wei Wu <en-wei.wu@xxxxxxxxxxxxx> wrote: > > The WCN7851 (0489:e0f3) Bluetooth controller supports firmware crash dump > collection through devcoredump. During this process, the crash dump data > is queued to a dump queue as skb for further processing. > > A NULL pointer dereference occurs in skb_dequeue() when processing the > dump queue due to improper return value handling: > > [ 93.672166] Bluetooth: hci0: ACL memdump size(589824) > > [ 93.672475] BUG: kernel NULL pointer dereference, address: 0000000000000008 > [ 93.672517] Workqueue: hci0 hci_devcd_rx [bluetooth] > [ 93.672598] RIP: 0010:skb_dequeue+0x50/0x80 > > The issue stems from handle_dump_pkt_qca() returning the wrong value on > success. It currently returns the value from hci_devcd_init() (0 on > success), but callers expect > 0 to indicate successful dump handling. > This causes hci_recv_frame() to free the skb while it's still queued for > dump processing, leading to the NULL pointer dereference when > hci_devcd_rx() tries to dequeue it. > > Fix this by: > > 1. Extracting dump packet detection into new is_dump_pkt_qca() function > 2. Making handle_dump_pkt_qca() return 0 on success and negative errno > on failure, consistent with other kernel interfaces > > This prevents premature skb freeing by ensuring proper handling of > dump packets. > > Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") > Signed-off-by: En-Wei Wu <en-wei.wu@xxxxxxxxxxxxx> > --- > changes in v2: > - Fix typo in the title > - Re-flow a line in the commit message to fit 72 characters > - Add a blank line before btusb_recv_acl_qca() > > drivers/bluetooth/btusb.c | 76 ++++++++++++++++++++++++--------------- > 1 file changed, 48 insertions(+), 28 deletions(-) > > diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c > index 279fe6c115fa..741be218610e 100644 > --- a/drivers/bluetooth/btusb.c > +++ b/drivers/bluetooth/btusb.c > @@ -2930,22 +2930,16 @@ static void btusb_coredump_qca(struct hci_dev *hdev) > bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); > } > > -/* > - * ==0: not a dump pkt. > - * < 0: fails to handle a dump pkt > - * > 0: otherwise. > - */ > +/* Return: 0 on success, negative errno on failure. */ > static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) > { > - int ret = 1; > + int ret = 0; > u8 pkt_type; > u8 *sk_ptr; > unsigned int sk_len; > u16 seqno; > u32 dump_size; > > - struct hci_event_hdr *event_hdr; > - struct hci_acl_hdr *acl_hdr; > struct qca_dump_hdr *dump_hdr; > struct btusb_data *btdata = hci_get_drvdata(hdev); > struct usb_device *udev = btdata->udev; > @@ -2955,30 +2949,14 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) > sk_len = skb->len; > > if (pkt_type == HCI_ACLDATA_PKT) { > - acl_hdr = hci_acl_hdr(skb); > - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) > - return 0; > sk_ptr += HCI_ACL_HDR_SIZE; > sk_len -= HCI_ACL_HDR_SIZE; I know this is in the original code, but this is totally unsafe, we can't go accessing the skb->data pointer without validating it has this size, not to mention it is a little odd, to say the least, to encode a dump event into a an ACL data packet, but then again it was in the original code so I assume the firmware really does weird things like this. Anyway if we know for sure this is a dump packet it shall be possible to use the likes of skb_pull_data and stop doing unsafe access like this. > - event_hdr = (struct hci_event_hdr *)sk_ptr; > - } else { > - event_hdr = hci_event_hdr(skb); > } > > - if ((event_hdr->evt != HCI_VENDOR_PKT) > - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) > - return 0; > - > sk_ptr += HCI_EVENT_HDR_SIZE; > sk_len -= HCI_EVENT_HDR_SIZE; Ditto, just use skb_pull_data. > dump_hdr = (struct qca_dump_hdr *)sk_ptr; > - if ((sk_len < offsetof(struct qca_dump_hdr, data)) > - || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) > - || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) > - return 0; > - > - /*it is dump pkt now*/ > seqno = le16_to_cpu(dump_hdr->seqno); > if (seqno == 0) { > set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); > @@ -3052,17 +3030,59 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) > return ret; > } > > +/* Return: true if packet is a dump packet, false otherwise. */ > +static bool is_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) > +{ > + u8 pkt_type; > + u8 *sk_ptr; > + unsigned int sk_len; > + > + struct hci_event_hdr *event_hdr; > + struct hci_acl_hdr *acl_hdr; > + struct qca_dump_hdr *dump_hdr; > + > + pkt_type = hci_skb_pkt_type(skb); > + sk_ptr = skb->data; > + sk_len = skb->len; > + > + if (pkt_type == HCI_ACLDATA_PKT) { > + acl_hdr = hci_acl_hdr(skb); > + if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) > + return false; > + sk_ptr += HCI_ACL_HDR_SIZE; > + sk_len -= HCI_ACL_HDR_SIZE; > + event_hdr = (struct hci_event_hdr *)sk_ptr; At this point we can actually use skb_pull_data as well since I don't think the stack is supposed to process data packets with QCA_MEMDUMP_ACL_HANDLE as handle. > + } else { > + event_hdr = hci_event_hdr(skb); > + } > + > + if ((event_hdr->evt != HCI_VENDOR_PKT) > + || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) > + return false; > + > + sk_ptr += HCI_EVENT_HDR_SIZE; > + sk_len -= HCI_EVENT_HDR_SIZE; Unsafe access, sk_len might loop around as well. > + dump_hdr = (struct qca_dump_hdr *)sk_ptr; > + if ((sk_len < offsetof(struct qca_dump_hdr, data)) > + || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) > + || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) > + return false; > + > + return true; This should probably be places in a qca specific portion, also this is not very efficient, so I wonder if we should have some means for driver to register handles for its vendor events like this, so driver don't have to go pick the packet appart to detect that it is not really meant for the Bluetooth stack to process. > +} > + > static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) > { > - if (handle_dump_pkt_qca(hdev, skb)) > - return 0; > + if (is_dump_pkt_qca(hdev, skb)) > + return handle_dump_pkt_qca(hdev, skb); This should be something like btqca_recv_acl, etc. > return hci_recv_frame(hdev, skb); > } > > static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb) > { > - if (handle_dump_pkt_qca(hdev, skb)) > - return 0; > + if (is_dump_pkt_qca(hdev, skb)) > + return handle_dump_pkt_qca(hdev, skb); Ditto, also since there is a clear difference between event vs ACL packet I don't think it should be calling the same helper function to detect if it is a dump packet or not. > return hci_recv_frame(hdev, skb); > } > > -- > 2.43.0 > -- Luiz Augusto von Dentz