On Thu, Sep 13, 2012 at 04:31:34PM -0400, Alan Stern wrote: > On Thu, 13 Sep 2012, Don Zickus wrote: > > > Hi Alan, > > > > I adapted your patch to our 2.6.32 tree and the customer tested it without > > success. The output panic is attached below. I will work on getting a > > machine with the latest kernel to reproduce this problem so I don't waste > > your time chasing something that might be fixed upstream. > > > > But if you could take a quick glance at the panic below to see if anything > > comes to mind I would appreciate it. > > > The test failed on the first surprise removal of PCI devices. Last gasp > > from the console is posted below. I would guess the faulting process was > > reading a file under /proc/bus/usb. I should have a dump if more info is > > needed. > > > ehci_hcd 0000:2c:00.0: HC died; cleaning up > > ehci_hcd 0000:2c:00.0: force halt; handhake ffffc90000654024 00004000 > > 00004000 -> -19 > > ehci_hcd 0000:2c:00.0: HC died; cleaning up > > ehci_hcd 0000:2c:00.0: remove, state 0 > > usb usb1: USB disconnect, device number 1 > > usb 1-1: USB disconnect, device number 2 > > usb 1-1.1: USB disconnect, device number 3 > > hub 4-1:1.0: unable to enumerate USB device on port 3 > > usb 1-1.3: USB disconnect, device number 4 > > usb 1-1.6: USB disconnect, device number 5 > > usb 1-1.6.1: USB disconnect, device number 6 > > ehci_hcd 0000:2c:00.0: USB bus 1 deregistered > > hub 4-1:1.0: unable to enumerate USB device on port 3 > > general protection fault: 0000 [#1] SMP > > last sysfs file: > > /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:3d:00.0/0000:3e:01.0/0000:66:00.0/usb3/3-1/3-1.6/3-1.6.1/3-1.6.1:1.2/input/input12/event12/uevent > > CPU 0 > > Modules linked in: autofs4 sunrpc configfs cachefiles fscache(T) bonding > > 8021q garp stp llc vhost_net macvtap macvlan tun uinput ipmi_devintf > > ftmod(P)(U) ipmi_msghandler sg matroxfb(U) fosil(U) ext4 mbcache jbd2 > > raid1 sr_mod cdrom sd_mod(U) crc_t10dif usb_storage mpt2sas(U) > > scsi_hbas(U) scsi_transport_sas raid_class igb(U) dca dm_mirror > > dm_region_hash dm_log dm_mod ipv6 cxgb4 cxgb3 mdio libiscsi_tcp libiscsi > > scsi_transport_iscsi [last unloaded: scsi_wait_scan] > > > > Pid: 32752, comm: cat Tainted: P --------------- T > > 2.6.32-279.el6.bz849188.test01.x86_64 #1 Stratus ftServer 2700/G7LAY > > RIP: 0010:[<ffffffff813b8167>] [<ffffffff813b8167>] usb_device_dump+0x87/0xa70 > > It would help to know what source statement that memory address > corresponds to. Offhand I can't see any remaining races between > usb_device_dump() and usb_disconnect(). I have added some in-depth analysis from our customer. The problem is that the failing routine was called with a pointer to usbdev in memory that has already been freed and overwritten with the pool poison pattern. This causes an access violation on line 502: 470 /* This is a recursive function. Parameters: 471 * buffer - the user-space buffer to write data into 472 * nbytes - the maximum number of bytes to write 473 * skip_bytes - the number of bytes to skip before writing anything 474 * file_offset - the offset into the devices file on completion 475 * The caller must own the device lock. 476 */ 477 static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes, 478 loff_t *skip_bytes, loff_t *file_offset, 479 struct usb_device *usbdev, struct usb_bus *bus, 480 int level, int index, int count) 481 { 482 int chix; 483 int ret, cnt = 0; 484 int parent_devnum = 0; 485 char *pages_start, *data_end, *speed; 486 unsigned int length; 487 ssize_t total_written = 0; 488 489 /* don't bother with anything else if we're not writing any data */ 490 if (*nbytes <= 0) 491 return 0; 492 493 if (level > MAX_TOPO_LEVEL) 494 return 0; 495 /* allocate 2^1 pages = 8K (on i386); 496 * should be more than enough for one device */ 497 pages_start = (char *)__get_free_pages(GFP_NOIO, 1); 498 if (!pages_start) 499 return -ENOMEM; 500 501 if (usbdev->parent && usbdev->parent->devnum != -1) 502 parent_devnum = usbdev->parent->devnum; 503 /* 504 * So the root hub's parent is 0 and any device that is 505 * plugged into the root hub has a parent of 0. 506 */ 507 switch (usbdev->speed) { crash> hex output radix: 16 (hex) crash> sym ffffffff813b8167 ffffffff813b8167 (t) usb_device_dump+0x87 ../debug/kernel-2.6.32-279.el6/linux-2.6.32-279.el6.bz849188.test01.x86_64/drivers/usb/core/devices.c: 501 crash> struct -o usb_device | grep parent [0x38] struct usb_device *parent; Annotated listing... crash> dis usb_device_dump | cut -f 2- -d' ' <usb_device_dump>: push %rbp <usb_device_dump+0x1>: mov %rsp,%rbp <usb_device_dump+0x4>: push %r15 <usb_device_dump+0x6>: push %r14 <usb_device_dump+0x8>: push %r13 <usb_device_dump+0xa>: push %r12 <usb_device_dump+0xc>: push %rbx <usb_device_dump+0xd>: sub $0xa8,%rsp <usb_device_dump+0x14>: nopl 0x0(%rax,%rax,1) <usb_device_dump+0x19>: mov %rdi,-0x68(%rbp) <<< **buffer <usb_device_dump+0x1d>: mov %rsi,-0x40(%rbp) <<< *nbytes <usb_device_dump+0x21>: mov %rdx,-0x60(%rbp) <<< *skip_bytes <usb_device_dump+0x25>: mov %rcx,-0x70(%rbp) <<< *file_offset <usb_device_dump+0x29>: mov %r8,-0x50(%rbp) <<< *usbdev <usb_device_dump+0x2d>: mov %r9,-0x58(%rbp) <<< *bus <usb_device_dump+0x31>: cmpq $0x0,(%rsi) <<< #490 <usb_device_dump+0x35>: je 0xffffffff813b811d <usb_device_dump+0x3d> <usb_device_dump+0x37>: cmpl $0x6,0x10(%rbp) <<< #493 <usb_device_dump+0x3b>: jle 0xffffffff813b8134 <usb_device_dump+0x54> <usb_device_dump+0x3d>: xor %ebx,%ebx <usb_device_dump+0x3f>: add $0xa8,%rsp <usb_device_dump+0x46>: mov %rbx,%rax <usb_device_dump+0x49>: pop %rbx <usb_device_dump+0x4a>: pop %r12 <usb_device_dump+0x4c>: pop %r13 <usb_device_dump+0x4e>: pop %r14 <usb_device_dump+0x50>: pop %r15 <usb_device_dump+0x52>: leaveq <usb_device_dump+0x53>: retq <usb_device_dump+0x54>: mov $0x1,%esi <<< #497 <usb_device_dump+0x59>: mov $0x10,%edi <usb_device_dump+0x5e>: mov $0xfffffffffffffff4,%rbx <<< -ENOMEM <usb_device_dump+0x65>: callq 0xffffffff81124120 <__get_free_pages> <usb_device_dump+0x6a>: test %rax,%rax <<< #498 <usb_device_dump+0x6d>: mov %rax,-0x98(%rbp) <<< pages_start <usb_device_dump+0x74>: je 0xffffffff813b811f <usb_device_dump+0x3f> <usb_device_dump+0x76>: mov -0x50(%rbp),%rdx <<< usbdev <usb_device_dump+0x7a>: mov 0x38(%rdx),%rax <<< usbdev->parent <usb_device_dump+0x7e>: test %rax,%rax <<< #501 <usb_device_dump+0x81>: je 0xffffffff813b8476 <usb_device_dump+0x396> <usb_device_dump+0x87>: mov (%rax),%r8d <<< #502 <usb_device_dump+0x8a>: cmp $0xffffffff,%r8d <usb_device_dump+0x8e>: je 0xffffffff813b8476 <usb_device_dump+0x396> crash> bt PID: 32752 TASK: ffff8801788a6a40 CPU: 0 COMMAND: "cat" #0 [ffff88014bfebc70] die at ffffffff8100f22b #1 [ffff88014bfebca0] do_general_protection at ffffffff815018c2 #2 [ffff88014bfebcd0] general_protection at ffffffff81501095 [exception RIP: usb_device_dump+0x87] RIP: ffffffff813b8167 RSP: ffff88014bfebd88 RFLAGS: 00010202 RAX: 6b6b6b6b6b6b6b6b RBX: fffffffffffffff4 RCX: 0000000000000000 RDX: ffff88017d2eeba8 RSI: 0000000000000000 RDI: ffffea00052a7978 RBP: ffff88014bfebe58 R8: 0000000000000000 R9: 000000000004b828 R10: 0000000000000000 R11: 0000000000000030 R12: ffff880176c88cd8 R13: 000000000000133a R14: ffff88014bfebeb0 R15: ffff88014bfebe98 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #3 [ffff88014bfebd80] usb_device_dump at ffffffff813b814a #4 [ffff88014bfebe60] usb_device_read at ffffffff813b8c70 #5 [ffff88014bfebef0] vfs_read at ffffffff8117cdf5 #6 [ffff88014bfebf30] sys_read at ffffffff8117cfb1 #7 [ffff88014bfebf80] tracesys at ffffffff8100b2c8 (via system_call) RIP: 0000003930cda360 RSP: 00007fffc8369c08 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: ffffffff8100b2c8 RCX: ffffffffffffffff RDX: 0000000000008000 RSI: 0000000002284000 RDI: 0000000000000005 RBP: 0000000002284000 R8: 0000003930f8dee8 R9: 0000000000000001 R10: 0000000000008fff R11: 0000000000000246 R12: ffffffffffff8000 R13: 0000000000000005 R14: 0000000000008000 R15: 0000000002284000 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b Here's *usbdev based upon address in RDX... crash> struct usb_device 0xffff88017d2eeba8 struct usb_device { devnum = 0x6b6b6b6b, devpath = "kkkkkkkkkkkkkkkk", route = 0x6b6b6b6b, state = 1802201963, speed = 1802201963, tt = 0x6b6b6b6b6b6b6b6b, ttport = 0x6b6b6b6b, toggle = {0x6b6b6b6b, 0x6b6b6b6b}, parent = 0x6b6b6b6b6b6b6b6b, bus = 0x6b6b6b6b6b6b6b6b, ep0 = { desc = { bLength = 0x6b, bDescriptorType = 0x6b, bEndpointAddress = 0x6b, bmAttributes = 0x6b, wMaxPacketSize = 0x6b6b, bInterval = 0x6b, bRefresh = 0x6b, bSynchAddress = 0x6b }, urb_list = { next = 0x6b6b6b6b6b6b6b6b, prev = 0x6b6b6b6b6b6b6b6b }, hcpriv = 0x6b6b6b6b6b6b6b6b, ep_dev = 0x6b6b6b6b6b6b6b6b, ss_ep_comp = 0x6b6b6b6b6b6b6b6b, extra = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, extralen = 0x6b6b6b6b, enabled = 0x6b6b6b6b }, dev = { parent = 0x6b6b6b6b6b6b6b6b, p = 0x6b6b6b6b6b6b6b6b, kobj = { name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, entry = { next = 0x6b6b6b6b6b6b6b6b, prev = 0x6b6b6b6b6b6b6b6b }, parent = 0x6b6b6b6b6b6b6b6b, kset = 0x6b6b6b6b6b6b6b6b, ktype = 0x6b6b6b6b6b6b6b6b, sd = 0x6b6b6b6b6b6b6b6b, kref = { refcount = { counter = 0x6b6b6b6b } }, state_initialized = 0x1, state_in_sysfs = 0x1, state_add_uevent_sent = 0x0, state_remove_uevent_sent = 0x1, uevent_suppress = 0x0 }, init_name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, type = 0x6b6b6b6b6b6b6b6b, Thanks, Don -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html