On 11/08/2013 01:30 PM, Daniel P. Berrange wrote: > On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote: >> I met a problem that container blocked by seteuid/setegid >> which is call in lxcContainerSetID on UP system and libvirt >> compiled with --with-fuse=yes. >> >> I looked into the glibc's codes, and found setxid in glibc >> calls futex() to wait for other threads to change their >> setxid_futex to 0(see setxid_mark_thread in glibc). >> >> since the process created by clone system call will not >> share the memory with the other threads and the context >> of memory doesn't changed until we call execl.(COW) >> >> So if the process which created by clone is called before >> fuse thread being stated, the new setxid_futex of fuse >> thread will not be saw in this process, it will be blocked >> forever. >> >> Maybe this problem should be fixed in glibc, but I send >> this patch as a quick fix. > > Can you show a stack trace of the threads/processes deadlocking > > Sure the libvirt_lxc tasks root 7922 0.0 0.1 118976 3704 ? Ssl 20:55 0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1 root 7927 0.0 0.1 53440 3072 ? S 20:55 0:00 /usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux --handshake 20 --background --veth vnet1 the pid of fuse thread is 7925 [root@localhost ~]# ls /proc/7922/task/ 7922 7925 gdb -p 7925 (gdb) bt #0 0x00007f2d39bcb83d in read () at ../sysdeps/unix/syscall-template.S:81 #1 0x00007f2d3a5dfb72 in fuse_kern_chan_receive () from /glibc/lib/libfuse.so.2 #2 0x00007f2d3a5e0b16 in fuse_ll_receive_buf () from /glibc/lib/libfuse.so.2 #3 0x00007f2d3a5dfdd1 in fuse_session_loop () from /glibc/lib/libfuse.so.2 #4 0x00007f2d3a5d8468 in fuse_loop () from /glibc/lib/libfuse.so.2 #5 0x00007f2d3aa55691 in lxcFuseRun (opaque=opaque@entry=0x7f2d3b13a420) at lxc/lxc_fuse.c:276 #6 0x00007f2d3aaebb8e in virThreadHelper (data=<optimized out>) at util/virthreadpthread.c:161 #7 0x00007f2d39bc4f22 in start_thread (arg=0x7f2d37fbc700) at pthread_create.c:309 #8 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 and the arg of start_thread is the struct pthread of fuse thread. you can see the setxid_futex of fuse pthread has been set to 0. (gdb) p *(struct pthread*)0x7f2d37fbc700 $1 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard = 9991483700321457629, vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = { 0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, { i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600, 0x8aa8e17d00c415dd, 0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list = 0x7f2d37fbc9e0, futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x7f2d37fbbe30, cancelhandling = 2, flags = 1, specific_1stblock = {{seq = 0, data = 0x0}, {seq = 0, data = 0x0}, {seq = 0, data = 0x0}, {seq = 1, data = 0x7f2d30021960}, {seq = 0, data = 0x0} <repeats 28 times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used = true, report_events = false, user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = 0, cpuclock_offset = 1398764389412, joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc = { exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = { retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = { nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""} For the cloned process 7927 gdb -p 7927 (gdb) bt #0 setxid_mark_thread (cmdp=0x7f2d3b2ef900, t=0x7f2d37fbc700) at allocatestack.c:994 #1 __nptl_setxid (cmdp=0x7f2d3b2ef900) at allocatestack.c:1086 #2 0x00007f2d392c1da1 in __setregid (rgid=rgid@entry=0, egid=egid@entry=0) at ../sysdeps/unix/sysv/linux/setregid.c:26 #3 0x00007f2d3aaf33f0 in virSetUIDGID (uid=uid@entry=0, gid=gid@entry=0, groups=groups@entry=0x0, ngroups=ngroups@entry=0) at util/virutil.c:1055 #4 0x00007f2d3aa51b3c in lxcContainerSetID (def=0x7f2d3b141190) at lxc/lxc_container.c:427 #5 lxcContainerChild (data=0x7fff40c4d960) at lxc/lxc_container.c:1829 #6 0x00007f2d392ca6ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111 the setxid_futex of fuse pthread(0x7f2d37fbc700) is still -2. (gdb) p *t $2 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard = 5516672127090939392, pointer_guard = 9991483700321457629, vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, __private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = { 0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, { i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}}, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 0x1, 0x0, 0x4c8f28122d8dd600, 0x8aa8e17d00c415dd, 0x0 <repeats 17 times>}}, list = {next = 0x7f2d39dd5270 <stack_used>, prev = 0x7f2d39dd5270 <stack_used>}, tid = 7925, pid = 7922, robust_prev = 0x7f2d37fbc9e0, robust_head = {list = 0x7f2d37fbc9e0, futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf = 0x0, cancelhandling = 0, flags = 1, specific_1stblock = {{seq = 0, data = 0x0} <repeats 32 times>}, specific = {0x7f2d37fbca10, 0x0 <repeats 31 times>}, specific_used = false, report_events = false, user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 0, *setxid_futex* = -2, cpuclock_offset = 0, joinid = 0x7f2d37fbc700, result = 0x0, schedparam = {__sched_priority = 0}, schedpolicy = 0, start_routine = 0x7f2d3aaebb60 <virThreadHelper>, arg = 0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0, exc = {exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, reported_guardsize = 4096, tpp = 0x0, res = {retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = {{sin_family = 0, sin_port = 0, sin_addr = { s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, {sin_family = 0, sin_port = 0, sin_addr = { s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}}, id = 0, dnsrch = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, defdname = '\000' <repeats 255 times>, pfcode = 0, ndots = 0, nsort = 0, ipv6_unavail = 0, unused = 0, sort_list = {{addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}, {addr = {s_addr = 0}, mask = 0}}, qhook = 0x0, rhook = 0x0, res_h_errno = 0, _vcsock = 0, _flags = 0, _u = {pad = '\000' <repeats 51 times>, _ext = {nscount = 0, nsmap = {0, 0, 0}, nssocks = {0, 0, 0}, nscount6 = 0, nsinit = 0, nsaddrs = {0x0, 0x0, 0x0}, initstamp = 0}}}, end_padding = 0x7f2d37fbcff0 ""} -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list