Hello linux-usb, I'd like to report another USB bug that appeared during surprise hotplug device removal. With slub_debug=FZPU set, I would occasionally see the following writes to a poison pattern: ============================================================================= BUG kmalloc-2048 (Tainted: PF O-------------- ): Poison overwritten ----------------------------------------------------------------------------- INFO: 0xffff880063bfe700-0xffff880063bfe706. First byte 0xff instead of 0x6b INFO: Allocated in usb_alloc_dev+0x35/0x310 age=435229 cpu=0 pid=3807 __slab_alloc+0x3d1/0x482 kmem_cache_alloc_trace+0x19f/0x1f0 usb_alloc_dev+0x35/0x310 usb_add_hcd+0x141/0x8b0 usb_hcd_pci_probe+0x413/0x540 local_pci_probe+0x45/0xa0 pci_device_probe+0x139/0x150 driver_probe_device+0x87/0x390 __device_attach+0x3b/0x40 bus_for_each_drv+0x6b/0xb0 device_attach+0x88/0xa0 pci_bus_add_device+0x35/0x60 pci_bus_add_devices+0x39/0xa0 pci_bus_add_devices+0x87/0xa0 pci_bus_add_devices+0x87/0xa0 ftmod_ioboard_event+0x3dc/0x400 [ftmod] INFO: Freed in usb_release_dev+0x60/0x70 age=648 cpu=0 pid=3326 __slab_free+0x4c/0x277 kfree+0xfd/0x130 usb_release_dev+0x60/0x70 device_release+0x32/0xa0 kobject_release+0x7e/0x1b0 kobject_put+0x28/0x60 put_device+0x17/0x20 usb_put_dev+0x1a/0x20 usb_remove_hcd+0x146/0x1d0 usb_hcd_pci_remove+0x12f/0x150 pci_device_remove+0x3b/0xb0 __device_release_driver+0x7f/0xf0 device_release_driver+0x23/0x30 bus_remove_device+0x108/0x180 device_del+0x135/0x1d0 pci_stop_bus_device+0x94/0xa0 INFO: Slab 0xffffea00018efe00 objects=13 used=13 fp=0x (null) flags=0x1fffff00004080 INFO: Object 0xffff880063bfe618 @offset=26136 fp=0x (null) ... snip ... Object ffff880063bfe6f8: 6b 6b 6b 6b 6b 6b 6b 6b ff ff ff ff 6d 6b 6d 6b kkkkkkkk....mkmk ... snip ... CPU: 0 PID: 2757 Comm: gnome-shell Tainted: PF B O-------------- 3.10.0-123.6.3.el7.bigphysarea.x86_64 #1 Hardware name: Stratus ftServer 2700/G7LAY, BIOS BIOS Version 6.3:57 12/25/2013 ffff88017ac04d80 00000000bc8f89b9 ffff88016487d880 ffffffff815e295b ffff88016487d8c0 ffffffff8119269d 0000000000000008 ffff880000000001 ffff880063bfe707 ffff88017ac04d80 000000000000006b ffff880063bfe618 Call Trace: [<ffffffff815e295b>] dump_stack+0x19/0x1b [<ffffffff8119269d>] print_trailer+0x14d/0x200 [<ffffffff8119288f>] check_bytes_and_report+0xcf/0x110 [<ffffffff81193607>] check_object+0x1d7/0x250 [<ffffffff814c10dd>] ? __alloc_skb+0x8d/0x2d0 [<ffffffff815dee01>] alloc_debug_processing+0x76/0x118 [<ffffffff815df916>] __slab_alloc+0x3d1/0x482 [<ffffffff814c10dd>] ? __alloc_skb+0x8d/0x2d0 [<ffffffff814c10ad>] ? __alloc_skb+0x5d/0x2d0 [<ffffffff81197ea1>] __kmalloc_node_track_caller+0xb1/0x270 [<ffffffff814c10dd>] ? __alloc_skb+0x8d/0x2d0 [<ffffffff814c0911>] __kmalloc_reserve.isra.25+0x31/0x90 [<ffffffff814c10dd>] __alloc_skb+0x8d/0x2d0 [<ffffffff814bb768>] sock_alloc_send_pskb+0x1e8/0x370 [<ffffffff81090b5f>] ? __wake_up_sync_key+0x4f/0x60 [<ffffffff814bb905>] sock_alloc_send_skb+0x15/0x20 [<ffffffff81573b4f>] unix_stream_sendmsg+0x27f/0x420 [<ffffffff814b70f7>] sock_aio_write+0x157/0x180 [<ffffffff811af9b9>] do_sync_readv_writev+0x79/0xd0 [<ffffffff811b0eae>] do_readv_writev+0xce/0x260 [<ffffffff811b10d5>] vfs_writev+0x35/0x60 [<ffffffff811b122c>] SyS_writev+0x5c/0xd0 [<ffffffff815f3059>] system_call_fastpath+0x16/0x1b FIX kmalloc-2048: Restoring 0xffff880063bfe700-0xffff880063bfe706=0x6b FIX kmalloc-2048: Marking all objects used crash> p 0xffff880063bfe700-0xffff880063bfe618 $1 = 0xe8 crash> p 0xe8-0x88 $2 = 0x60 struct usb_device { [0x0] int devnum; [0x4] char devpath[16]; [0x14] u32 route; [0x18] enum usb_device_state state; [0x1c] enum usb_device_speed speed; [0x20] struct usb_tt *tt; [0x28] int ttport; [0x2c] unsigned int toggle[2]; [0x38] struct usb_device *parent; [0x40] struct usb_bus *bus; [0x48] struct usb_host_endpoint ep0; [0x88] struct device dev; crash> struct -o device struct device { [0x0] struct device *parent; [0x8] struct device_private *p; [0x10] struct kobject kobj; [0x50] const char *init_name; [0x58] const struct device_type *type; > [0x60] struct mutex mutex; crash> struct -o mutex struct mutex { [0x0] atomic_t count; << ff ff ff ff [0x4] spinlock_t wait_lock; << 6d 6b 6d 6b [0x8] struct list_head wait_list; [0x18] struct task_struct *owner; [0x20] struct optimistic_spin_queue *osq; } SIZE: 0x28 Setting up mutex debugging didn't help track down the use-after-free, so I patched usb_alloc_dev and usb_release_dev to use __get_free_page and set_memory_ro to set a read-only page trap, rather than releasing and poisioning the area. With that in place, I restarted the tests and hit: BUG: unable to handle kernel paging request at ffff88005e5250ec IP: [<ffffffff815e9ffe>] _raw_spin_lock+0xe/0x50 PGD 1e1f067 PUD 17ffff067 PMD 168b63063 PTE 800000005e525161 Oops: 0003 [#1] SMP Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables ebtable_nat ebtables team_mode_activebackup ccmod(POF) team sg ftmod(OF) coretemp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr dm_mirror dm_region_hash dm_log ipmi_devintf mperf nfsd auth_rpcgss dm_multipath nfs_acl lockd dm_mod uinput sunrpc ext4 mbcache jbd2 raid1 sr_mod cdrom usb_storage mpt2sas(OF) raid_class sd_mod(OF) igb(OF) ptp crc_t10dif crct10dif_common pps_core scsi_hbas(OF) matroxfb(OF) dca i2c_algo_bit scsi_transport_sas videosw(OF) i2c_core ipmi_msghandler CPU: 1 PID: 93 Comm: khubd Tainted: PF O-------------- 3.10.0-123.6.3.el7.bigphysarea_usb_debug5.x86_64 #1 Hardware name: Stratus ftServer 2700/G7LAY, BIOS BIOS Version 6.3:57 12/25/2013 task: ffff880178da6580 ti: ffff880178dbe000 task.ti: ffff880178dbe000 RIP: 0010:[<ffffffff815e9ffe>] [<ffffffff815e9ffe>] _raw_spin_lock+0xe/0x50 RSP: 0018:ffff880178dbfd60 EFLAGS: 00010286 RAX: 0000000000020000 RBX: ffff88005e5250e8 RCX: 0000000000000000 RDX: ffff880178dbffd8 RSI: ffff880178da6580 RDI: ffff88005e5250ec RBP: ffff880178dbfd60 R08: ffff880178dbe000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: ffff88005e5250ec R13: ffff880178da6580 R14: 00000000ffffffff R15: ffff88005e5250f0 FS: 0000000000000000(0000) GS:ffff88017b220000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88005e5250ec CR3: 00000001688a1000 CR4: 00000000000407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff880178dbfdc0 ffffffff815e704d ffff88005e5250f0 ffff88005e5250f0 ffff880178da6580 00000000088ce504 ffff880178dbfda8 ffff88005e5250e8 ffff88005e5250e8 ffff880174d7d9c8 0000000000000000 ffff880174d78af0 Call Trace: [<ffffffff815e704d>] __mutex_lock_slowpath+0xcd/0x1c0 [<ffffffff815e64af>] mutex_lock+0x1f/0x2f [<ffffffff813f5886>] hub_events+0xb6/0xa10 [<ffffffff813f6225>] hub_thread+0x45/0x1a0 [<ffffffff81086ac0>] ? wake_up_bit+0x30/0x30 [<ffffffff813f61e0>] ? hub_events+0xa10/0xa10 [<ffffffff81085aff>] kthread+0xcf/0xe0 [<ffffffff81085a30>] ? kthread_create_on_node+0x140/0x140 [<ffffffff815f2fac>] ret_from_fork+0x7c/0xb0 [<ffffffff81085a30>] ? kthread_create_on_node+0x140/0x140 drivers/usb/core/hub.c: 4667 <hub_events+0xb6>: testb $0x4,0x90(%r15) <hub_events+0xbe>: jne 0xffffffff813f58b2 <hub_events+0xe2> 4655 hdev = hub->hdev; 4656 hub_dev = hub->intfdev; 4657 intf = to_usb_interface(hub_dev); 4658 dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", 4659 hdev->state, hdev->maxchild, 4660 /* NOTE: expects max 15 ports... */ 4661 (u16) hub->change_bits[0], 4662 (u16) hub->event_bits[0]); 4663 4664 /* Lock the device, then check to see if we were 4665 * disconnected while waiting for the lock to succeed. */ 4666 usb_lock_device(hdev); crash> struct usb_hub ffff880174d78a90 | grep -e hdev -e disconnected hdev = 0xffff88005e525000, disconnected = 0x1, Yep, that was our page. Notice that the hub was marked disconnected. So we know that hub_disconnect ran: 1604 static void hub_disconnect(struct usb_interface *intf) ... 1610 /* Take the hub off the event list and don't let it be added again */ 1611 spin_lock_irq(&hub_event_lock); 1612 if (!list_empty(&hub->event_list)) { 1613 list_del_init(&hub->event_list); 1614 usb_autopm_put_interface_no_suspend(intf); 1615 } 1616 hub->disconnected = 1; 1617 spin_unlock_irq(&hub_event_lock); But somehow hub_events also got a hold on the same device: 4619 static void hub_events(void) ... 4641 /* Grab the first entry at the beginning of the list */ 4642 spin_lock_irq(&hub_event_lock); 4643 if (list_empty(&hub_event_list)) { 4644 spin_unlock_irq(&hub_event_lock); 4645 break; 4646 } 4647 4648 tmp = hub_event_list.next; 4649 list_del_init(tmp); 4650 4651 hub = list_entry(tmp, struct usb_hub, event_list); 4652 kref_get(&hub->kref); 4653 spin_unlock_irq(&hub_event_lock); hub_events does later go on to check for hub->disconnected, but that's outside the hub_event_lock and *after* the call to usb_lock_device. In my traces, that's too late as its already been freed and poisoned. There's probably a better way to coordinate these two functions, but the following change (on top of the one in my other mail) has run our device removal tests without incident thus far. Thanks, -- Joe -->8-- -->8-- >From e5b3b8c6ffd282312dc8c9bec0ce261f56ae9731 Mon Sep 17 00:00:00 2001 From: Joe Lawrence <joe.lawrence@xxxxxxxxxxx> Date: Tue, 9 Sep 2014 09:39:38 -0400 Subject: [PATCH] usb: hub: take hub->hdev reference when adding to eventlist During surprise device hotplug removal tests, it was observed that hub_events may try to call usb_lock_device on a device that has already been freed. Protect the usb_device by taking out a reference before the hub is added to the eventlist, releasing the reference after hub_events is finished using it. Signed-off-by: Joe Lawrence <joe.lawrence@xxxxxxxxxxx> --- drivers/usb/core/hub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index fe74c9b..6f83cc7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -581,6 +581,7 @@ static void kick_khubd(struct usb_hub *hub) spin_lock_irqsave(&hub_event_lock, flags); if (!hub->disconnected && list_empty(&hub->event_list)) { + usb_get_dev(hub->hdev); list_add_tail(&hub->event_list, &hub_event_list); /* Suppress autosuspend until khubd runs */ @@ -5139,6 +5140,7 @@ static void hub_events(void) usb_autopm_put_interface(intf); loop_disconnected: usb_unlock_device(hdev); + usb_put_dev(hdev); kref_put(&hub->kref, hub_release); } /* end while (1) */ -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html