Re: [PATCH] LXC: make sure fuse thread start to run before we do clone

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/08/2013 01:30 PM, Daniel P. Berrange wrote:
> On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
>> I met a problem that container blocked by seteuid/setegid
>> which is call in lxcContainerSetID on UP system and libvirt
>> compiled with --with-fuse=yes.
>>
>> I looked into the glibc's codes, and found setxid in glibc
>> calls futex() to wait for other threads to change their
>> setxid_futex to 0(see setxid_mark_thread in glibc).
>>
>> since the process created by clone system call will not
>> share the memory with the other threads and the context
>> of memory doesn't changed until we call execl.(COW)
>>
>> So if the process which created by clone is called before
>> fuse thread being stated, the new setxid_futex of fuse
>> thread will not be saw in this process, it will be blocked
>> forever.
>>
>> Maybe this problem should be fixed in glibc, but I send
>> this patch as a quick fix.
> 
> Can you show a stack trace of the threads/processes deadlocking
> 
> 
Sure

the libvirt_lxc tasks
root      7922  0.0  0.1 118976  3704 ?        Ssl  20:55   0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1
root      7927  0.0  0.1  53440  3072 ?        S    20:55   0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1

the pid of fuse thread is 7925
[root@localhost ~]# ls /proc/7922/task/
7922  7925

gdb -p 7925
(gdb) bt
#0  0x00007f2d39bcb83d in read () at ../sysdeps/unix/syscall-template.S:81
#1  0x00007f2d3a5dfb72 in fuse_kern_chan_receive () from /glibc/lib/libfuse.so.2
#2  0x00007f2d3a5e0b16 in fuse_ll_receive_buf () from /glibc/lib/libfuse.so.2
#3  0x00007f2d3a5dfdd1 in fuse_session_loop () from /glibc/lib/libfuse.so.2
#4  0x00007f2d3a5d8468 in fuse_loop () from /glibc/lib/libfuse.so.2
#5  0x00007f2d3aa55691 in lxcFuseRun (opaque=opaque@entry=0x7f2d3b13a420) at lxc/lxc_fuse.c:276
#6  0x00007f2d3aaebb8e in virThreadHelper (data=<optimized out>) at util/virthreadpthread.c:161
#7  0x00007f2d39bc4f22 in start_thread (arg=0x7f2d37fbc700) at pthread_create.c:309
#8  0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

and the arg of start_thread is the struct pthread of fuse thread.
you can see the setxid_futex of fuse pthread has been set to 0.

(gdb) p *(struct pthread*)0x7f2d37fbc700
$1 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard =
9991483700321457629,
      vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0,
0}}, {i = {
              0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0,
              0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0,
0}}, {
            i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i =
{0, 0, 0,
              0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600,
0x8aa8e17d00c415dd,
      0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list =
0x7f2d37fbc9e0,
    futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x7f2d37fbbe30, cancelhandling = 2, flags = 1, specific_1stblock = {{seq = 0, data = 0x0}, {seq = 0, data = 0x0}, {seq
= 0,
      data = 0x0}, {seq = 1, data = 0x7f2d30021960}, {seq = 0, data = 0x0} <repeats 28 times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used = true, report_events = false,
  user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = 0, cpuclock_offset = 1398764389412, joinid = 0x7f2d37fbc700, result = 0x0, schedparam =
{__sched_priority = 0},
  schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0,
exc = {
    exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = {
    retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0,
        sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0,
0x0,
      0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0},
        mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr
= 0},
        mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = {
        nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""}

For the cloned process 7927
gdb -p 7927
(gdb) bt
#0  setxid_mark_thread (cmdp=0x7f2d3b2ef900, t=0x7f2d37fbc700) at allocatestack.c:994
#1  __nptl_setxid (cmdp=0x7f2d3b2ef900) at allocatestack.c:1086
#2  0x00007f2d392c1da1 in __setregid (rgid=rgid@entry=0, egid=egid@entry=0) at ../sysdeps/unix/sysv/linux/setregid.c:26
#3  0x00007f2d3aaf33f0 in virSetUIDGID (uid=uid@entry=0, gid=gid@entry=0, groups=groups@entry=0x0, ngroups=ngroups@entry=0) at util/virutil.c:1055
#4  0x00007f2d3aa51b3c in lxcContainerSetID (def=0x7f2d3b141190) at lxc/lxc_container.c:427
#5  lxcContainerChild (data=0x7fff40c4d960) at lxc/lxc_container.c:1829
#6  0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

the setxid_futex of fuse pthread(0x7f2d37fbc700) is still -2.
(gdb) p *t
$2 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard =
9991483700321457629,
      vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0,
0}}, {i = {
              0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0,
              0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0,
0}}, {
            i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i =
{0, 0, 0,
              0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600,
0x8aa8e17d00c415dd,
      0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list =
0x7f2d37fbc9e0,
    futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x0, cancelhandling = 0, flags = 1, specific_1stblock = {{seq = 0, data = 0x0} <repeats 32 times>}, specific =
{0x7f2d37fbca10,
    0x0 <repeats 31 times>}, specific_used = false, report_events = false, user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = -2, cpuclock_offset = 0,
  joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask =
{event_bits = {0,
        0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc = {exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000,
  stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = {retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = {
          s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0,
sin_addr = {
          s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0,
    ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0},
        mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0,
    rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = {nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0,
0x0,
          0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""}

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]