Re: [PATCH] usb/core: Fix race condition when removing EHCI PCI devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Sep 13, 2012 at 04:31:34PM -0400, Alan Stern wrote:
> On Thu, 13 Sep 2012, Don Zickus wrote:
> 
> > Hi Alan,
> > 
> > I adapted your patch to our 2.6.32 tree and the customer tested it without
> > success.  The output panic is attached below.  I will work on getting a
> > machine with the latest kernel to reproduce this problem so I don't waste
> > your time chasing something that might be fixed upstream.
> > 
> > But if you could take a quick glance at the panic below to see if anything
> > comes to mind I would appreciate it.
> 
> > The test failed on the first surprise removal of PCI devices.  Last gasp
> > from the console is posted below.  I would guess the faulting process was
> > reading a file under /proc/bus/usb.  I should have a dump if more info is
> > needed.
> 
> > ehci_hcd 0000:2c:00.0: HC died; cleaning up
> > ehci_hcd 0000:2c:00.0: force halt; handhake ffffc90000654024 00004000
> > 00004000 -> -19
> > ehci_hcd 0000:2c:00.0: HC died; cleaning up
> > ehci_hcd 0000:2c:00.0: remove, state 0
> > usb usb1: USB disconnect, device number 1
> > usb 1-1: USB disconnect, device number 2
> > usb 1-1.1: USB disconnect, device number 3
> > hub 4-1:1.0: unable to enumerate USB device on port 3
> > usb 1-1.3: USB disconnect, device number 4
> > usb 1-1.6: USB disconnect, device number 5
> > usb 1-1.6.1: USB disconnect, device number 6
> > ehci_hcd 0000:2c:00.0: USB bus 1 deregistered
> > hub 4-1:1.0: unable to enumerate USB device on port 3
> > general protection fault: 0000 [#1] SMP 
> > last sysfs file:
> > /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:3d:00.0/0000:3e:01.0/0000:66:00.0/usb3/3-1/3-1.6/3-1.6.1/3-1.6.1:1.2/input/input12/event12/uevent
> > CPU 0 
> > Modules linked in: autofs4 sunrpc configfs cachefiles fscache(T) bonding
> > 8021q garp stp llc vhost_net macvtap macvlan tun uinput ipmi_devintf
> > ftmod(P)(U) ipmi_msghandler sg matroxfb(U) fosil(U) ext4 mbcache jbd2
> > raid1 sr_mod cdrom sd_mod(U) crc_t10dif usb_storage mpt2sas(U)
> > scsi_hbas(U) scsi_transport_sas raid_class igb(U) dca dm_mirror
> > dm_region_hash dm_log dm_mod ipv6 cxgb4 cxgb3 mdio libiscsi_tcp libiscsi
> > scsi_transport_iscsi [last unloaded: scsi_wait_scan]
> > 
> > Pid: 32752, comm: cat Tainted: P           ---------------  T
> > 2.6.32-279.el6.bz849188.test01.x86_64 #1 Stratus ftServer 2700/G7LAY
> > RIP: 0010:[<ffffffff813b8167>]  [<ffffffff813b8167>] usb_device_dump+0x87/0xa70
> 
> It would help to know what source statement that memory address
> corresponds to.  Offhand I can't see any remaining races between
> usb_device_dump() and usb_disconnect().

I have added some in-depth analysis from our customer.


The problem is that the failing routine was called with a pointer to usbdev in
memory that has already been freed and overwritten with the pool poison pattern.
This causes an access violation on line 502:

470 /* This is a recursive function. Parameters:
471  * buffer - the user-space buffer to write data into
472  * nbytes - the maximum number of bytes to write
473  * skip_bytes - the number of bytes to skip before writing anything
474  * file_offset - the offset into the devices file on completion
475  * The caller must own the device lock.
476  */
477 static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes,
478                                loff_t *skip_bytes, loff_t *file_offset,
479                                struct usb_device *usbdev, struct usb_bus *bus,
480                                int level, int index, int count)
481 {
482         int chix;
483         int ret, cnt = 0;
484         int parent_devnum = 0;
485         char *pages_start, *data_end, *speed;
486         unsigned int length;
487         ssize_t total_written = 0;
488 
489         /* don't bother with anything else if we're not writing any data */
490         if (*nbytes <= 0)
491                 return 0;
492 
493         if (level > MAX_TOPO_LEVEL)
494                 return 0;
495         /* allocate 2^1 pages = 8K (on i386);
496          * should be more than enough for one device */
497         pages_start = (char *)__get_free_pages(GFP_NOIO, 1);
498         if (!pages_start)
499                 return -ENOMEM;
500 
501         if (usbdev->parent && usbdev->parent->devnum != -1)
502                 parent_devnum = usbdev->parent->devnum;
503         /*
504          * So the root hub's parent is 0 and any device that is
505          * plugged into the root hub has a parent of 0.
506          */
507         switch (usbdev->speed) {

crash> hex
output radix: 16 (hex)
crash> sym ffffffff813b8167
ffffffff813b8167 (t) usb_device_dump+0x87 ../debug/kernel-2.6.32-279.el6/linux-2.6.32-279.el6.bz849188.test01.x86_64/drivers/usb/core/devices.c: 501

crash> struct -o usb_device | grep parent
   [0x38] struct usb_device *parent;

Annotated listing...
crash> dis usb_device_dump | cut -f 2- -d' '
<usb_device_dump>:	push   %rbp
<usb_device_dump+0x1>:	mov    %rsp,%rbp
<usb_device_dump+0x4>:	push   %r15
<usb_device_dump+0x6>:	push   %r14
<usb_device_dump+0x8>:	push   %r13
<usb_device_dump+0xa>:	push   %r12
<usb_device_dump+0xc>:	push   %rbx
<usb_device_dump+0xd>:	sub    $0xa8,%rsp
<usb_device_dump+0x14>:	nopl   0x0(%rax,%rax,1)
<usb_device_dump+0x19>:	mov    %rdi,-0x68(%rbp)		<<< **buffer
<usb_device_dump+0x1d>:	mov    %rsi,-0x40(%rbp)		<<< *nbytes
<usb_device_dump+0x21>:	mov    %rdx,-0x60(%rbp)		<<< *skip_bytes
<usb_device_dump+0x25>:	mov    %rcx,-0x70(%rbp)		<<< *file_offset
<usb_device_dump+0x29>:	mov    %r8,-0x50(%rbp)		<<< *usbdev
<usb_device_dump+0x2d>:	mov    %r9,-0x58(%rbp)		<<< *bus
<usb_device_dump+0x31>:	cmpq   $0x0,(%rsi)		<<< #490
<usb_device_dump+0x35>:	je     0xffffffff813b811d <usb_device_dump+0x3d>
<usb_device_dump+0x37>:	cmpl   $0x6,0x10(%rbp)		<<< #493
<usb_device_dump+0x3b>:	jle    0xffffffff813b8134 <usb_device_dump+0x54>
<usb_device_dump+0x3d>:	xor    %ebx,%ebx
<usb_device_dump+0x3f>:	add    $0xa8,%rsp
<usb_device_dump+0x46>:	mov    %rbx,%rax
<usb_device_dump+0x49>:	pop    %rbx
<usb_device_dump+0x4a>:	pop    %r12
<usb_device_dump+0x4c>:	pop    %r13
<usb_device_dump+0x4e>:	pop    %r14
<usb_device_dump+0x50>:	pop    %r15
<usb_device_dump+0x52>:	leaveq 
<usb_device_dump+0x53>:	retq   
<usb_device_dump+0x54>:	mov    $0x1,%esi		<<< #497
<usb_device_dump+0x59>:	mov    $0x10,%edi
<usb_device_dump+0x5e>:	mov    $0xfffffffffffffff4,%rbx <<< -ENOMEM
<usb_device_dump+0x65>:	callq  0xffffffff81124120 <__get_free_pages>
<usb_device_dump+0x6a>:	test   %rax,%rax		<<< #498
<usb_device_dump+0x6d>:	mov    %rax,-0x98(%rbp)		<<< pages_start
<usb_device_dump+0x74>:	je     0xffffffff813b811f <usb_device_dump+0x3f>
<usb_device_dump+0x76>:	mov    -0x50(%rbp),%rdx		<<< usbdev
<usb_device_dump+0x7a>:	mov    0x38(%rdx),%rax		<<< usbdev->parent
<usb_device_dump+0x7e>:	test   %rax,%rax		<<< #501
<usb_device_dump+0x81>:	je     0xffffffff813b8476 <usb_device_dump+0x396>
<usb_device_dump+0x87>:	mov    (%rax),%r8d		<<< #502
<usb_device_dump+0x8a>:	cmp    $0xffffffff,%r8d
<usb_device_dump+0x8e>:	je     0xffffffff813b8476 <usb_device_dump+0x396>

crash> bt
PID: 32752  TASK: ffff8801788a6a40  CPU: 0   COMMAND: "cat"
 #0 [ffff88014bfebc70] die at ffffffff8100f22b
 #1 [ffff88014bfebca0] do_general_protection at ffffffff815018c2
 #2 [ffff88014bfebcd0] general_protection at ffffffff81501095
    [exception RIP: usb_device_dump+0x87]
    RIP: ffffffff813b8167  RSP: ffff88014bfebd88  RFLAGS: 00010202
    RAX: 6b6b6b6b6b6b6b6b  RBX: fffffffffffffff4  RCX: 0000000000000000
    RDX: ffff88017d2eeba8  RSI: 0000000000000000  RDI: ffffea00052a7978
    RBP: ffff88014bfebe58   R8: 0000000000000000   R9: 000000000004b828
    R10: 0000000000000000  R11: 0000000000000030  R12: ffff880176c88cd8
    R13: 000000000000133a  R14: ffff88014bfebeb0  R15: ffff88014bfebe98
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #3 [ffff88014bfebd80] usb_device_dump at ffffffff813b814a
 #4 [ffff88014bfebe60] usb_device_read at ffffffff813b8c70
 #5 [ffff88014bfebef0] vfs_read at ffffffff8117cdf5
 #6 [ffff88014bfebf30] sys_read at ffffffff8117cfb1
 #7 [ffff88014bfebf80] tracesys at ffffffff8100b2c8 (via system_call)
    RIP: 0000003930cda360  RSP: 00007fffc8369c08  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: ffffffff8100b2c8  RCX: ffffffffffffffff
    RDX: 0000000000008000  RSI: 0000000002284000  RDI: 0000000000000005
    RBP: 0000000002284000   R8: 0000003930f8dee8   R9: 0000000000000001
    R10: 0000000000008fff  R11: 0000000000000246  R12: ffffffffffff8000
    R13: 0000000000000005  R14: 0000000000008000  R15: 0000000002284000
    ORIG_RAX: 0000000000000000  CS: 0033  SS: 002b

Here's *usbdev based upon address in RDX...

crash> struct usb_device 0xffff88017d2eeba8
struct usb_device {
  devnum = 0x6b6b6b6b, 
  devpath = "kkkkkkkkkkkkkkkk", 
  route = 0x6b6b6b6b, 
  state = 1802201963, 
  speed = 1802201963, 
  tt = 0x6b6b6b6b6b6b6b6b, 
  ttport = 0x6b6b6b6b, 
  toggle = {0x6b6b6b6b, 0x6b6b6b6b}, 
  parent = 0x6b6b6b6b6b6b6b6b, 
  bus = 0x6b6b6b6b6b6b6b6b, 
  ep0 = {
    desc = {
      bLength = 0x6b, 
      bDescriptorType = 0x6b, 
      bEndpointAddress = 0x6b, 
      bmAttributes = 0x6b, 
      wMaxPacketSize = 0x6b6b, 
      bInterval = 0x6b, 
      bRefresh = 0x6b, 
      bSynchAddress = 0x6b
    }, 
    urb_list = {
      next = 0x6b6b6b6b6b6b6b6b, 
      prev = 0x6b6b6b6b6b6b6b6b
    }, 
    hcpriv = 0x6b6b6b6b6b6b6b6b, 
    ep_dev = 0x6b6b6b6b6b6b6b6b, 
    ss_ep_comp = 0x6b6b6b6b6b6b6b6b, 
    extra = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, 
    extralen = 0x6b6b6b6b, 
    enabled = 0x6b6b6b6b
  }, 
  dev = {
    parent = 0x6b6b6b6b6b6b6b6b, 
    p = 0x6b6b6b6b6b6b6b6b, 
    kobj = {
      name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, 
      entry = {
        next = 0x6b6b6b6b6b6b6b6b, 
        prev = 0x6b6b6b6b6b6b6b6b
      }, 
      parent = 0x6b6b6b6b6b6b6b6b, 
      kset = 0x6b6b6b6b6b6b6b6b, 
      ktype = 0x6b6b6b6b6b6b6b6b, 
      sd = 0x6b6b6b6b6b6b6b6b, 
      kref = {
        refcount = {
          counter = 0x6b6b6b6b
        }
      }, 
      state_initialized = 0x1, 
      state_in_sysfs = 0x1, 
      state_add_uevent_sent = 0x0, 
      state_remove_uevent_sent = 0x1, 
      uevent_suppress = 0x0
    }, 
    init_name = 0x6b6b6b6b6b6b6b6b <Address 0x6b6b6b6b6b6b6b6b out of bounds>, 
    type = 0x6b6b6b6b6b6b6b6b, 

Thanks,
Don
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux