A customer of ours noticed that after a bunch of hot removals of the EHCI PCI device, a panic occurs. This happened on a 2.6.32 RHEL-6 kernel, but I believe is still applicable upstream. The panic was further simplified to enabling SLUB debug poisoning and running the following command: while true; do find /proc/bus/usb -type f -exec cat {} >/dev/null \; ; done & This gets the machine to panic after 1 or 2 removals of the device. The backtrace looks like: crash> bt PID: 23674 TASK: ffff8801ab411540 CPU: 2 COMMAND: "sosreport" #0 [ffff8801b9073980] machine_kexec at ffffffff8103281b #1 [ffff8801b90739e0] crash_kexec at ffffffff810ba662 #2 [ffff8801b9073ab0] oops_end at ffffffff81501290 #3 [ffff8801b9073ae0] no_context at ffffffff81043bab #4 [ffff8801b9073b30] __bad_area_nosemaphore at ffffffff81043e35 #5 [ffff8801b9073b80] bad_area at ffffffff81043f5e #6 [ffff8801b9073bb0] __do_page_fault at ffffffff81044710 #7 [ffff8801b9073cd0] do_page_fault at ffffffff8150326e #8 [ffff8801b9073d00] page_fault at ffffffff81500625 [exception RIP: __list_add+0x26] RIP: ffffffff81282fb6 RSP: ffff8801b9073db8 RFLAGS: 00010046 RAX: 0000000000000292 RBX: ffff8801b9073de8 RCX: 0000000000000000 RDX: ffff8802782788f8 RSI: 0000000000000000 RDI: ffff8801b9073de8 RBP: ffff8801b9073dd8 R8: 0000000000000000 R9: 0000000000000001 R10: ffff880028401fc0 R11: 0000000000000000 R12: ffff8802782788f8 R13: 0000000000000000 R14: 7fffffffffffffff R15: ffff8801b9073e98 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff8801b9073de0] __down at ffffffff814ff58e #10 [ffff8801b9073e30] down at ffffffff81097ef1 #11 [ffff8801b9073e60] usb_device_read at ffffffff813b78ce #12 [ffff8801b9073ef0] vfs_read at ffffffff8117b8b5 #13 [ffff8801b9073f30] sys_read at ffffffff8117b9f1 #14 [ffff8801b9073f80] tracesys at ffffffff8100b308 (via system_call) RIP: 00000030f90da360 RSP: 00007fffef71dda8 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: ffffffff8100b308 RCX: ffffffffffffffff RDX: 0000000000001000 RSI: 00007f1926568000 RDI: 0000000000000003 RBP: 0000000000000000 R8: 00000000084023c4 R9: 00007f1926530700 R10: 0000000000002028 R11: 0000000000000246 R12: 0000000008403b7e R13: 0000000000000846 R14: 00000000083159c0 R15: 00007fffef71de40 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b This is likely because the following thread was in the process of removing the device: PID: 3070 TASK: ffff8802735fd540 CPU: 1 COMMAND: "io_poll" #0 [ffff8802788a97b0] schedule at ffffffff814fd7e2 #1 [ffff8802788a9878] __mutex_lock_slowpath at ffffffff814fee8e #2 [ffff8802788a98e8] mutex_lock at ffffffff814fed2b #3 [ffff8802788a9908] usb_deregister_bus at ffffffff813a59ff #4 [ffff8802788a9938] usb_remove_hcd at ffffffff813a5b9d #5 [ffff8802788a9978] usb_hcd_pci_remove at ffffffff813b80b3 #6 [ffff8802788a9998] pci_device_remove at ffffffff812930b7 #7 [ffff8802788a99b8] __device_release_driver at ffffffff8135006f #8 [ffff8802788a99d8] device_release_driver at ffffffff813501dd #9 [ffff8802788a99f8] bus_remove_device at ffffffff8134f073 #10 [ffff8802788a9a28] device_del at ffffffff8134ccbd #11 [ffff8802788a9a58] device_unregister at ffffffff8134cd92 #12 [ffff8802788a9a78] pci_stop_bus_device at ffffffff8128c4cc #13 [ffff8802788a9aa8] pci_stop_bus_device at ffffffff8128c47b #14 [ffff8802788a9ad8] pci_stop_bus_device at ffffffff8128c47b #15 [ffff8802788a9b08] pci_remove_bus_device at ffffffff8128c57a #16 [ffff8802788a9b38] remove_io at ffffffffa0361db8 [ftmod] #17 [ffff8802788a9b98] ioboard_event at ffffffffa0362130 [ftmod] #18 [ffff8802788a9c18] io_state_change at ffffffffa0365c9e [ftmod] #19 [ffff8802788a9c38] OsIoStateChange at ffffffffa0406638 [ftmod] #20 [ffff8802788a9c58] IoStateChange at ffffffffa0407a4b [ftmod] #21 [ffff8802788a9db8] CcIoPoll at ffffffffa040af64 [ftmod] #22 [ffff8802788a9e98] io_poll at ffffffffa0365b00 [ftmod] #23 [ffff8802788a9f48] kernel_thread at ffffffff8100c14a The fix that solved their problem was to deregister the usb bus first before running usb_put_dev. After running multiple tests the panic disappeared. Deregistering the bus first to prevent future readers from accessing the device before cleaning up the hcd, seemed like an appropriate way to go. I'll leave it to the experts to validate that or provide a better solution. :-) I adapted their usb_remove_hcd fix for usb_add_hcd error path too to mimic the same behaviour (a little code shuffling to handle the gotos cleanly). Signed-off-by: Don Zickus <dzickus@xxxxxxxxxx> --- drivers/usb/core/hcd.c | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index bc84106..d155bbf 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2428,13 +2428,6 @@ int usb_add_hcd(struct usb_hcd *hcd, if ((retval = usb_register_bus(&hcd->self)) < 0) goto err_register_bus; - if ((rhdev = usb_alloc_dev(NULL, &hcd->self, 0)) == NULL) { - dev_err(hcd->self.controller, "unable to allocate root hub\n"); - retval = -ENOMEM; - goto err_allocate_root_hub; - } - hcd->self.root_hub = rhdev; - switch (hcd->speed) { case HCD_USB11: rhdev->speed = USB_SPEED_FULL; @@ -2450,6 +2443,13 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_set_rh_speed; } + if ((rhdev = usb_alloc_dev(NULL, &hcd->self, 0)) == NULL) { + dev_err(hcd->self.controller, "unable to allocate root hub\n"); + retval = -ENOMEM; + goto err_allocate_root_hub; + } + hcd->self.root_hub = rhdev; + /* wakeup flag init defaults to "everything works" for root hubs, * but drivers can override it in reset() if needed, along with * recording the overall controller's system wakeup capability. @@ -2541,10 +2541,10 @@ err_hcd_driver_start: free_irq(irqnum, hcd); err_request_irq: err_hcd_driver_setup: -err_set_rh_speed: - usb_put_dev(hcd->self.root_hub); err_allocate_root_hub: usb_deregister_bus(&hcd->self); +err_set_rh_speed: + usb_put_dev(hcd->self.root_hub); err_register_bus: hcd_buffer_destroy(hcd); return retval; @@ -2606,8 +2606,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) free_irq(hcd->irq, hcd); } - usb_put_dev(hcd->self.root_hub); usb_deregister_bus(&hcd->self); + usb_put_dev(hcd->self.root_hub); hcd_buffer_destroy(hcd); } EXPORT_SYMBOL_GPL(usb_remove_hcd); -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html