Hi, I'm using Intel's Wireless-N 7260. I'm testing bluetooth part of it and sometimes kernel crashes. The kernel in question is 3.8 with wireless part being 3.10 with additional changes to sco.c. I have a HFP connection to the phone. While in a call the audio is routed from the phone to the board. I then disconnect the SCO connection to have audio back on the phone. That's when it crashes. I've gathered some logs of when it work and when it doesn't when it works: [ 302.808054] hci_rx_work:3544: hci0 Event packet [ 302.813344] hci_disconn_complete_evt:1854: hci0 status 0x00 [ 302.819638] sco_disconn_cfm:1090: hcon f251f800 reason 19 [ 302.826012] sco_conn_del:125: hcon f251f800 conn f2128400, err 104, fun_p: fa283deb [ 302.834767] sco_sock_clear_timer:73: sock f25c7000 state 1 [ 302.840916] sco_chan_del:971: sk f25c7000, conn f2128400, err 104 [ 302.847736] sco_chan_del:979: sk f25c7000, conn f2128400, err 104 [ 302.854564] sco_chan_del:988: sk f25c7000, set ZAPPED [ 302.854655] sco_sock_shutdown:910: sock f1c69080, sk f25c7000, func_p: c12a313e [ 302.868424] sock_set_flag:676: sk: f25c7000, sock: f1c69080, state: 9, flag: 8 [ 302.876521] sco_conn_del:133: before bh_unlock_sock, sk: f25c7000, hcon f251f800 conn f2128400, err 104 [ 302.887078] sco_sock_clear_timer:73: sock f25c7000 state 9 [ 302.887177] sco_conn_del:135: calling sco_sock_kill in sco_conn_del with: hcon f251f800 conn f2128400, err 104 [ 302.887181] sco_sock_kill:340: in kill: sk f25c7000 sock: f1c69080 state 9, is zapped: 1, caller: fa283deb [ 302.887185] hci_conn_del:461: hci0 hcon f251f800 handle 265 [ 302.887189] hci_chan_list_flush:1027: hcon f251f800 [ 302.926877] __sco_sock_close:355: sk f25c7000 state 9 socket f1c69080 [ 302.934128] sock_set_flag:676: sk: f25c7000, sock: f1c69080, state: 9, flag: 8 [ 302.942413] sco_sock_release:934: sock f1c69080, sk f25c7000 [ 303.052559] sco_sock_close:388: sk f25c7000, fun_p: fa2835c8 [ 303.058930] sco_sock_clear_timer:73: sock f25c7000 state 9 [ 303.065133] __sco_sock_close:355: sk f25c7000 state 9 socket f1c69080 [ 303.072394] sock_set_flag:676: sk: f25c7000, sock: f1c69080, state: 9, flag: 8 [ 303.080679] sco_sock_kill:340: in kill: sk f25c7000 sock: f1c69080 state 9, is zapped: 1, caller: fa2835c8 [ 303.091522] sock_set_flag:676: sk: f25c7000, sock: f1c69080, state: 9, flag: 0 [ 303.099668] sco_sock_kill:340: in kill: sk f25c7000 sock: (null) state 9, is zapped: 1, caller: c12a21b0 [ 303.110534] sco_sock_kill:345: sk f25c7000 state 9 [ 303.115987] bt_sock_unlink:146: sk f25c7000 state 9 [ 303.121493] sock_set_flag:676: sk: f25c7000, sock: (null), state: 9, flag: 0 [ 303.129632] sco_sock_destruct:313: sk f25c7000 when it fails: [ 404.505698] hci_rx_work:3544: hci0 Event packet [ 404.510869] hci_event_packet:3870: hci0 event 0x1b [ 404.516549] hci_rx_work:3544: hci0 Event packet [ 404.521698] hci_disconn_complete_evt:1854: hci0 status 0x00 [ 404.527983] sco_disconn_cfm:1090: hcon f325d000 reason 19 [ 404.534107] sco_conn_del:125: hcon f325d000 conn f18b5460, err 104, fun_p: fa283deb [ 404.543010] sco_sock_clear_timer:73: sock f26e8000 state 1 [ 404.549177] sco_chan_del:971: sk f26e8000, conn f18b5460, err 104 [ 404.556002] sco_chan_del:979: sk f26e8000, conn f18b5460, err 104 [ 404.562838] sco_chan_del:988: sk f26e8000, set ZAPPED [ 404.568496] sock_set_flag:676: sk: f26e8000, sock: f46dd240, state: 9, flag: 8 [ 404.576594] sco_conn_del:133: before bh_unlock_sock, sk: f26e8000, hcon f325d000 conn f18b5460, err 104 [ 404.587123] sco_conn_del:135: calling sco_sock_kill in sco_conn_del with: hcon f325d000 conn f18b5460, err 104 [ 404.587270] sco_sock_shutdown:910: sock f46dd240, sk f26e8000, func_p: c12a313e [ 404.587273] sco_sock_clear_timer:73: sock f26e8000 state 9 [ 404.587276] __sco_sock_close:355: sk f26e8000 state 9 socket f46dd240 [ 404.587279] sock_set_flag:676: sk: f26e8000, sock: f46dd240, state: 9, flag: 8 [ 404.587326] sco_sock_release:934: sock f46dd240, sk f26e8000 [ 404.587328] sco_sock_close:388: sk f26e8000, fun_p: fa2835c8 [ 404.587331] sco_sock_clear_timer:73: sock f26e8000 state 9 [ 404.587333] __sco_sock_close:355: sk f26e8000 state 9 socket f46dd240 [ 404.587336] sock_set_flag:676: sk: f26e8000, sock: f46dd240, state: 9, flag: 8 [ 404.587339] sco_sock_kill:340: in kill: sk f26e8000 sock: f46dd240 state 9, is zapped: 1, caller: fa2835c8 [ 404.587342] sock_set_flag:676: sk: f26e8000, sock: f46dd240, state: 9, flag: 0 [ 404.587346] sco_sock_kill:340: in kill: sk f26e8000 sock: (null) state 9, is zapped: 1, caller: c12a21b0 [ 404.587348] sco_sock_kill:345: sk f26e8000 state 9 [ 404.587350] bt_sock_unlink:146: sk f26e8000 state 9 [ 404.587353] sock_set_flag:676: sk: f26e8000, sock: (null), state: 9, flag: 0 [ 404.587355] sco_sock_destruct:313: sk f26e8000 [ 405.226130] sco_sock_kill:340: in kill: sk f26e8000 sock: (null) state 117, is zapped: 1, caller: fa283deb [ 405.341112] sco_sock_kill:345: sk f26e8000 state 117 [ 405.346835] bt_sock_unlink:146: sk f26e8000 state 117 [ 405.352654] BUG: unable to handle kernel paging request at 7541203a [ 405.359656] IP: [<fa261bb9>] bt_sock_unlink+0x43/0x79 [bluetooth] [ 405.366483] *pdpt = 000000003327e001 *pde = 0000000000000000 [ 405.372907] Oops: 0002 [#1] PREEMPT SMP [ 405.377303] Modules linked in: evdev libcomposite usb2380 udc_core rfcomm bnep btusb bluetooth ata_generic cdc_acm uinput hid_multitouch usbhid hid spidev spi_pxa2xx_platform coretemp xhci_hcd ata_piix[ 405.396841] sock_set_flag:676: TCP: sk: f259fa80, sock: f1d1e800, state: 1, flag: 1 spi_pxa2xx_pci libata igb_avb(O) hwmon ptp pps_core adv atomisp videobuf2_dma_contig videobuf2_memops videobuf2_core i2c_dev i2c_designware_pci i2c_designware_core [ 405.473529] Pid: 7, comm: kworker/u:0H Tainted: G W IO 3.8.0-8.0-adaptation-byt-ivi-brd #1 [ 405.483634] EIP: 0060:[<fa261bb9>] EFLAGS: 00010206 CPU: 2 [ 405.489782] EIP is at bt_sock_unlink+0x43/0x79 [bluetooth] Function sco_sock_kill and few others had more logs added to it to trace theirs caller and execution progress (patch included below). In all cases it's called 3 times from addresses: fa283deb, c12a21b0 and fa2835c8 (luckily these are consistent across reboots) I'm not sure what is c12a21b0 because I can't find it in kallsyms: c12a219c T sock_release c12a21fe T __sock_create sco_disconn_cfm is calling sco_chan_del and this in the end calls sco_sock_kill. sco_chan_del is interrupted by sco_sock_shutdown. It looks like that if this shutdown is called while still holding a lock (before bh_unlock_sock(sk) ) all if fine, but if it arrives after unlock the sco_chan_del contexts restarts after c12a21b0 finishes (this destroys the socket) and there is a kernel crash as sco_sock_kill holds pointer to a socket that is already gone. Has anybody seen similar issues or have any other suggestions? This seems as a drastic thing to do but removing "sco_sock_kill(sk)" from sco_conn_del stops the kernel from crashing. A patch I've used to print extra debugs: >From 0b272acaebec42d7c28271be104631d8ed5da4ba Mon Sep 17 00:00:00 2001 From: Kuba Pawlak <kubax.t.pawlak@xxxxxxxxx> Date: Wed, 26 Feb 2014 18:48:04 +0100 Subject: [PATCH] extra debugs in SCO management --- include/net/sock.h | 1 + net/bluetooth/af_bluetooth.c | 1 + net/bluetooth/sco.c | 20 +++++++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 182ca99405adfb30e7b72181847b4a5ffd5d69ca..4820ae4957eb68ca146418f9d4c9cea0b2c4142e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -673,6 +673,7 @@ static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) { + pr_debug("sk: %p, sock: %p, state: %d, flag: %d\n", sk, sk->sk_socket, sk->sk_state, flag); __set_bit(flag, &sk->sk_flags); } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5355df63d39b673d246d131564a1e17dc116c27d..4da91dfdea1893d8a2fbecf0a272ebc17852d519 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -152,6 +152,7 @@ EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { + BT_DBG("sk %p state %d", sk, sk->sk_state); write_lock(&l->lock); sk_del_node_init(sk); write_unlock(&l->lock); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57f250c20e399851ca6998d9813dc60e3ffa3455..d4d22f61899dd8c6184f9d398fb66db9a2981757 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -51,7 +51,8 @@ static void sco_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; - BT_DBG("sock %p state %d", sk, sk->sk_state); + BT_DBG("sock %p state %d, fun_p: %p", sk, sk->sk_state, + __builtin_return_address(0)); bh_lock_sock(sk); sk->sk_err = ETIMEDOUT; @@ -122,7 +123,8 @@ static int sco_conn_del(struct hci_conn *hcon, int err) if (!conn) return 0; - BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + BT_DBG("hcon %p conn %p, err %d, fun_p: %p", hcon, conn, err, + __builtin_return_address(0)); /* Kill socket */ sk = sco_chan_get(conn); @@ -130,7 +132,11 @@ static int sco_conn_del(struct hci_conn *hcon, int err) bh_lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); + BT_DBG("before bh_unlock_sock, sk: %p, hcon %p conn %p, err %d", + sk, hcon, conn, err); bh_unlock_sock(sk); + BT_DBG("calling sco_sock_kill in sco_conn_del with: hcon %p conn %p, err %d", + hcon, conn, err); sco_sock_kill(sk); } @@ -330,6 +336,10 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { + BT_DBG("in kill: sk %p sock: %p state %d, is zapped: %d, caller: %p, refcnt: %d", + sk, sk->sk_socket, sk->sk_state, sock_flag(sk, SOCK_ZAPPED), + __builtin_return_address(1), atomic_read(&sk->sk_refcnt)); + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; @@ -375,6 +385,8 @@ static void __sco_sock_close(struct sock *sk) /* Must be called on unlocked socket. */ static void sco_sock_close(struct sock *sk) { + BT_DBG("sk %p, fun_p: %p", sk, __builtin_return_address(0)); + sco_sock_clear_timer(sk); lock_sock(sk); __sco_sock_close(sk); @@ -837,7 +849,7 @@ static int sco_sock_release(struct socket *sock) struct sock *sk = sock->sk; int err = 0; - BT_DBG("sock %p, sk %p", sock, sk); + BT_DBG("sock %p, sk %p, func_p: %p", sock, sk, __builtin_return_address(0)); if (!sk) return 0; @@ -882,6 +894,7 @@ static void sco_chan_del(struct sock *sk, int err) sco_pi(sk)->conn = NULL; sco_conn_unlock(conn); + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); if (conn->hcon) hci_conn_put(conn->hcon); } @@ -890,6 +903,7 @@ static void sco_chan_del(struct sock *sk, int err) sk->sk_err = err; sk->sk_state_change(sk); + BT_DBG("sk %p, set ZAPPED", sk); sock_set_flag(sk, SOCK_ZAPPED); } -- 1.7.11.7 Kuba Pawlak Intel GmbH Dornacher Strasse 1 85622 Feldkirchen/Muenchen, Deutschland Sitz der Gesellschaft: Feldkirchen bei Muenchen Geschaeftsfuehrer: Christian Lamprechter, Hannes Schwaderer, Douglas Lusk Registergericht: Muenchen HRB 47456 Ust.-IdNr./VAT Registration No.: DE129385895 Citibank Frankfurt a.M. (BLZ 502 109 00) 600119052 -- To unsubscribe from this list: send the line "unsubscribe linux-bluetooth" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html