Hello, We detected a spurious crash of `bluetoothd` in our embedded system, acting as a GATT server. The system is running the latest BlueZ release (5.76). I traced the problem to a use-after-free (UAF) error, sometimes triggered when a device disconnects while a characteristic read or write is still being processed. Below are relevant debug and Valgrind traces. ---- bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req - handle: 0x0014 bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read Complete: err 0 bluetoothd[167]: src/shared/att.c:can_read_data() (chan 0x53bd198) ATT PDU received: 0x0a bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req - handle: 0x002e bluetoothd[167]: src/shared/mgmt.c:can_read_data() [0x0000] event 0x000c bluetoothd[167]: src/adapter.c:dev_disconnected() Device 48:A4:72:79:82:D5 disconnected, reason 3 bluetoothd[167]: src/adapter.c:adapter_remove_connection() bluetoothd[167]: plugins/policy.c:disconnect_cb() reason 3 bluetoothd[167]: src/adapter.c:bonding_attempt_complete() hci0 bdaddr 48:A4:72:79:82:D5 type 1 status 0xe bluetoothd[167]: src/device.c:device_bonding_complete() bonding (nil) status 0x0e bluetoothd[167]: src/device.c:btd_device_set_temporary() temporary 1 bluetoothd[167]: src/device.c:device_bonding_failed() status 14 bluetoothd[167]: src/adapter.c:resume_discovery() bluetoothd[167]: src/shared/att.c:disconnect_cb() Channel 0x53bd198 disconnected: Connection reset by peer bluetoothd[167]: src/device.c:att_disconnected_cb() bluetoothd[167]: src/device.c:att_disconnected_cb() Connection reset by peer (104) bluetoothd[167]: src/service.c:change_state() 0x53faa80: device 48:A4:72:79:82:D5 profile gap-profile state changed: connected -> disconnecting (0) bluetoothd[167]: src/service.c:change_state() 0x53faa80: device 48:A4:72:79:82:D5 profile gap-profile state changed: disconnecting -> disconnected (0) bluetoothd[167]: src/service.c:change_state() 0x54017a8: device 48:A4:72:79:82:D5 profile deviceinfo state changed: connected -> disconnecting (0) bluetoothd[167]: src/service.c:change_state() 0x54017a8: device 48:A4:72:79:82:D5 profile deviceinfo state changed: disconnecting -> disconnected (0) bluetoothd[167]: src/gatt-client.c:btd_gatt_client_disconnected() Device disconnected. Cleaning up. bluetoothd[167]: src/device.c:att_disconnected_cb() Automatic connection disabled bluetoothd[167]: src/gatt-database.c:btd_gatt_database_att_disconnected() bluetoothd[167]: src/gatt-database.c:att_disconnected() bluetoothd[167]: attrib/gattrib.c:g_attrib_unref() 0x53bd140: g_attrib_unref=0 bluetoothd[167]: src/gatt-database.c:read_reply_cb() Pending read was canceled when object got removed bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read Complete: err -110 ==167== Invalid read of size 4 ==167== at 0x8277C: bt_att_chan_send_error_rsp (in /usr/libexec/bluetooth/bluetoothd) ==167== Address 0x53bd198 is 0 bytes inside a block of size 44 free'd ==167== at 0x482F350: free (vg_replace_malloc.c:538) ==167== by 0x8320F: disconnect_cb (in /usr/libexec/bluetooth/bluetoothd) ==167== Block was alloc'd at ==167== at 0x482DE08: malloc (vg_replace_malloc.c:307) ==167== by 0x7BDD3: util_malloc (in /usr/libexec/bluetooth/bluetoothd) ==167== [cropped, valgrind complains a lot after this] ---- These logs illustrate that the problem occurs soon after `read_complete_cb` is executed, in `bt_att_chan_send_error_rsp`. src/shared/gatt-server.c 904 static void read_complete_cb(struct gatt_db_attribute *attr, int err, 905 const uint8_t *value, size_t len, 906 void *user_data) 907 { 908 struct async_read_op *op = user_data; 909 struct bt_gatt_server *server = op->server; 910 uint8_t rsp_opcode; 911 uint16_t mtu; 912 uint16_t handle; 913 914 DBG(server, "Read Complete: err %d", err); 915 916 mtu = bt_att_get_mtu(server->att); 917 handle = gatt_db_attribute_get_handle(attr); 918 919 if (err) { 920 bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err); 921 async_read_op_destroy(op); 922 return; 923 } However, at this point, `op->chan` has already been freed by a `disconnect_cb`. I'm not familiar with the BlueZ code base and I don't know the idiomatic way to fix this issue. I'd say `bt_att_chan_send_error_rsp` should only be called if `op->chan` is still allocated. The following patch attempts to implement a solution. It's probably not the right way to do it, but this seemingly fixes the issue on our side. ----- diff --git a/src/shared/att.c b/src/shared/att.c index 485ef07..baef0cc 100644 --- a/src/shared/att.c +++ b/src/shared/att.c @@ -2068,3 +2068,11 @@ done: return true; } + +struct queue *bt_att_get_chans(struct bt_att *att) +{ + if (!att) + return NULL; + + return att->chans; +} \ No newline at end of file diff --git a/src/shared/att.h b/src/shared/att.h index 6fd7863..98f91e1 100644 --- a/src/shared/att.h +++ b/src/shared/att.h @@ -111,3 +111,5 @@ bool bt_att_set_remote_key(struct bt_att *att, uint8_t sign_key[16], bt_att_counter_func_t func, void *user_data); bool bt_att_has_crypto(struct bt_att *att); bool bt_att_set_retry(struct bt_att *att, unsigned int id, bool retry); + +struct queue *bt_att_get_chans(struct bt_att *att); \ No newline at end of file diff --git a/src/shared/gatt-server.c b/src/shared/gatt-server.c index 0e399ce..ca9af59 100644 --- a/src/shared/gatt-server.c +++ b/src/shared/gatt-server.c @@ -790,7 +790,21 @@ static void write_complete_cb(struct gatt_db_attribute *attr, int err, handle = gatt_db_attribute_get_handle(attr); if (err) - bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err); + { + // Only call bt_att_chan_send_error_rsp if the channel + // is still valid + struct bt_att *att = bt_gatt_server_get_att(server); + struct queue *chans = bt_att_get_chans(att); + const struct queue_entry *entry; + for (entry = queue_get_entries(chans); entry; entry = entry->next) + { + if (entry->data == op->chan) + { + bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err); + break; + } + } + } else bt_att_chan_send_rsp(op->chan, BT_ATT_OP_WRITE_RSP, NULL, 0); @@ -917,7 +931,19 @@ static void read_complete_cb(struct gatt_db_attribute *attr, int err, handle = gatt_db_attribute_get_handle(attr); if (err) { - bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err); + // Only call bt_att_chan_send_error_rsp if the channel + // is still valid + struct bt_att *att = bt_gatt_server_get_att(server); + struct queue *chans = bt_att_get_chans(att); + const struct queue_entry *entry; + for (entry = queue_get_entries(chans); entry; entry = entry->next) + { + if (entry->data == op->chan) + { + bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err); + break; + } + } async_read_op_destroy(op); return; } ----- Regards, Kévin