Hi, all,
We were running OpenStack with Ubuntu and libvirt 0.9.10. We found that libvirt monitor command not working well.
There were a lot of error in libvirtd.log like this
2013-02-07 06:07:39.000+0000: 18112: error : qemuDomainObjBeginJobInternal:773 : Timed out during operation: cannot acquire state change lock
We dig into libvirtd by strace and find one of the thread only have the following command
futex(0x7f69ac0ec0ec, FUTEX_WAIT_PRIVATE, 2717, NULL
It seems this thread waiting for reply but nothing came back thus other threads would wait for it. We also saw there is a function called virCondWaitUntil(). Is it safe for us to modify the code from virCondWait() to virCondWaitUntil() to prevent such deadlock scenario? Thanks.
Following is the gdb -p 'libvirt.pid' and 'thread id' and 'bt full'
#0 0x00007f69c8c1dd84 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1 0x00007f69c9ee884a in virCondWait (c=<optimized out>, m=<optimized out>) at util/threads-pthread.c:117
ret = <optimized out>
#2 0x000000000049c749 in qemuMonitorSend (mon=0x7f69ac0ec0c0, msg=<optimized out>) at qemu/qemu_monitor.c:826
ret = -1
__func__ = "qemuMonitorSend"
__FUNCTION__ = "qemuMonitorSend"
#3 0x00000000004ac8ed in qemuMonitorJSONCommandWithFd (mon=0x7f69ac0ec0c0, cmd=0x7f6998028280, scm_fd=-1, reply=0x7f69c57829f8)
at qemu/qemu_monitor_json.c:230
ret = -1
msg = {txFD = -1, txBuffer = 0x7f69980e9b00 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}\r\n", txOffset = 49, txLength = 49,
rxBuffer = 0x0, rxLength = 0, rxObject = 0x0, finished = false, passwordHandler = 0, passwordOpaque = 0x0}
cmdstr = 0x7f69980ef2f0 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}"
id = 0x7f69980b0a20 "libvirt-1359"
exe = <optimized out>
__FUNCTION__ = "qemuMonitorJSONCommandWithFd"
__func__ = "qemuMonitorJSONCommandWithFd"
#4 0x00000000004ae794 in qemuMonitorJSONGetBalloonInfo (mon=0x7f69ac0ec0c0, currmem=0x7f69c5782a48) at qemu/qemu_monitor_json.c:1190
ret = <optimized out>
cmd = 0x7f6998028280
reply = 0x0
__FUNCTION__ = "qemuMonitorJSONGetBalloonInfo"
#5 0x0000000000457451 in qemudDomainGetInfo (dom=<optimized out>, info=0x7f69c5782b50) at qemu/qemu_driver.c:2181
priv = 0x7f69a0093b00
driver = 0x7f69b80ca8e0
vm = 0x7f69a0093370
ret = -1
err = <optimized out>
balloon = <optimized out>
__FUNCTION__ = "qemudDomainGetInfo"
#6 0x00007f69c9f63eda in virDomainGetInfo (domain=0x7f69980e3650, info=0x7f69c5782b50) at libvirt.c:4230
ret = <optimized out>
conn = <optimized out>
__func__ = "virDomainGetInfo"
__FUNCTION__ = "virDomainGetInfo"
#7 0x0000000000439bca in remoteDispatchDomainGetInfo (ret=0x7f6998000c20, args=<optimized out>, rerr=0x7f69c5782c50, client=0x157e730,
server=<optimized out>, msg=<optimized out>) at remote_dispatch.h:1640
rv = -1
tmp = {state = 1 '\001', maxMem = 2097152, memory = 0, nrVirtCpu = 0, cpuTime = 5981880000000}
dom = 0x7f69980e3650
priv = <optimized out>
#8 remoteDispatchDomainGetInfoHelper (server=<optimized out>, client=0x157e730, msg=<optimized out>, rerr=0x7f69c5782c50, args=<optimized out>,
ret=0x7f6998000c20) at remote_dispatch.h:1616
__func__ = "remoteDispatchDomainGetInfoHelper"
#9 0x00007f69c9fbb915 in virNetServerProgramDispatchCall (msg=0x1689cc0, client=0x157e730, server=0x1577c90, prog=0x15825d0)
at rpc/virnetserverprogram.c:416
ret = 0x7f6998000c20 ""
rv = -1
i = <optimized out>
arg = 0x7f6998027950 "\360e\n\230i\177"
dispatcher = 0x73de40
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
#10 virNetServerProgramDispatch (prog=0x15825d0, server=0x1577c90, client=0x157e730, msg=0x1689cc0) at rpc/virnetserverprogram.c:289
ret = -1
rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0}
__func__ = "virNetServerProgramDispatch"
__FUNCTION__ = "virNetServerProgramDispatch"
#11 0x00007f69c9fb6461 in virNetServerHandleJob (jobOpaque=<optimized out>, opaque=0x1577c90) at rpc/virnetserver.c:164
srv = 0x1577c90
job = 0x155dfa0
__func__ = "virNetServerHandleJob"
#12 0x00007f69c9ee8e3e in virThreadPoolWorker (opaque=<optimized out>) at util/threadpool.c:144
data = "">
pool = 0x1577d80
cond = 0x1577de0
priority = false
job = 0x162dd20
#13 0x00007f69c9ee84e6 in virThreadHelper (data="" out>) at util/threads-pthread.c:161
args = 0x0
local = {func = 0x7f69c9ee8d00 <virThreadPoolWorker>, opaque = 0x1559f90}
#14 0x00007f69c8c19e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#15 0x00007f69c89474bd in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#16 0x0000000000000000 in ?? ()
No symbol table info available.
Regards,
Chun-Hung
_______________________________________________ libvirt-users mailing list libvirt-users@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvirt-users
- Prev by Date: Failed to to connect socket to '/var/run/libvirt/libvirt-sock': Connection refused
- Next by Date: libvirt v1.0.2 fails to boot LXC container, but v1.0.0 works
- Previous by thread: Failed to to connect socket to '/var/run/libvirt/libvirt-sock': Connection refused
- Next by thread: libvirt v1.0.2 fails to boot LXC container, but v1.0.0 works
- Index(es):