Modify GetAllStats to generate a long enough pause in order to send a SIGTERM to libvirtd while a client connection is processing. In order to "set things up": 1. In one terminal window from a local branch built using these patches using a root account run libvirtd debug, e.g.: # ./run gdb src/libvirtd once running, type a 'c' (e.g. continue) and <return> 2. Start a domain (or have one running with the current libvirtd) virsh start $domain 3. Prepare a domstats command for that domain (but don't yet hit <return> in order run it): virsh domstats $domain 4. Prepare a kill command for the running libvirtd, e.g.: jferlan 4143 4107 0 09:51 pts/1 00:00:00 vim +1396 src/libvirtd.c root 30054 21195 6 11:17 pts/8 00:00:01 gdb /home/jferlan/git/libvirt.work/src/.libs/lt-libvirtd root 30087 30054 7 11:17 pts/8 00:00:01 /home/jferlan/git/libvirt.work/src/.libs/lt-libvirtd root 30385 19861 0 11:17 pts/17 00:00:00 grep --color=auto libvirtd but again don't hit <return> yet. 5. Align your stars perfectly now... a. Hit <return> on your domstats command b. Swap to the kill command window and hit <return> This should cause the libvirtd debug window to stop, but since you already typed 'c' it'll continue at least briefly, for example: ... [Thread 0x7fffc3231700 (LWP 30374) exited] Detaching after fork from child process 30376. Detaching after fork from child process 30377. Detaching after fork from child process 30378. [Thread 0x7fffc4445700 (LWP 30106) exited] c 2018-01-10 16:18:12.962+0000: 30094: info : libvirt version: 4.0.0 2018-01-10 16:18:12.962+0000: 30094: info : hostname: unknown4ceb42c824f4.attlocal.net 2018-01-10 16:18:12.962+0000: 30094: warning : qemuConnectGetAllDomainStats:20265 : k = -5340232226128654848 Thread 1 "lt-libvirtd" received signal SIGTERM, Terminated. 0x00007ffff3ae6d2d in poll () from /lib64/libc.so.6 ... (gdb) c Continuing. [Thread 0x7fffc5c48700 (LWP 30103) exited] [Thread 0x7fffc5447700 (LWP 30104) exited] [Thread 0x7fffc4c46700 (LWP 30105) exited] [Thread 0x7fffc6449700 (LWP 30102) exited] [Thread 0x7fffc6c4a700 (LWP 30101) exited] [Thread 0x7fffe3b57700 (LWP 30097) exited] [Thread 0x7fffe4358700 (LWP 30096) exited] [Thread 0x7fffe2354700 (LWP 30100) exited] [Thread 0x7fffe3356700 (LWP 30098) exited] [Thread 0x7fffe2b55700 (LWP 30099) exited] [Thread 0x7fffe535a700 (LWP 30094) exited] [Thread 0x7fffe5b5b700 (LWP 30093) exited] [Thread 0x7fffe635c700 (LWP 30092) exited] [Thread 0x7fffe6b5d700 (LWP 30091) exited] 2018-01-10 16:18:25.451+0000: 30095: warning : qemuConnectGetAllDomainStats:20265 : k = -5340232226128654848 [Thread 0x7fffe4b59700 (LWP 30095) exited] [Thread 0x7fffc3a32700 (LWP 30187) exited] [Inferior 1 (process 30087) exited normally] (gdb) c The program is not being run. (gdb) quit The virsh domstats window will "close" as follows: error: Disconnected from qemu:///system due to end of file error: End of file while reading data: Input/output error If something's wrong, then the libvirtd window may not exit those final two threads in which case you could interrupt it (^c) and check the threads (thread apply all bt) which will probably show some sort of hang... My testing shows that the hang no longer occurs with all the previous patches applied. The subsequent patch calling virHashRemoveAll from virNetDaemonClose does not seem to be necessary, although I suppose it cannot hurt as the same essential functionality occurs during the Dispose function Signed-off-by: John Ferlan <jferlan@xxxxxxxxxx> --- src/qemu/qemu_driver.c | 5 +++++ src/rpc/virnetdaemon.c | 3 ++- src/rpc/virnetserver.c | 4 ++-- src/util/virthreadpool.c | 10 +++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 9a35e04a85..fa725131a2 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -20454,6 +20454,7 @@ qemuConnectGetAllDomainStats(virConnectPtr conn, bool enforce = !!(flags & VIR_CONNECT_GET_ALL_DOMAINS_STATS_ENFORCE_STATS); int nstats = 0; size_t i; + size_t j, k = 0; int ret = -1; unsigned int privflags = 0; unsigned int domflags = 0; @@ -20492,6 +20493,10 @@ qemuConnectGetAllDomainStats(virConnectPtr conn, if (qemuDomainGetStatsNeedMonitor(stats)) privflags |= QEMU_DOMAIN_STATS_HAVE_JOB; + for (j = 0; j < 10000000000; j++) // Add one more zero for longer... + k = j + k; + VIR_WARN("k = %zd", k); + for (i = 0; i < nvms; i++) { virDomainStatsRecordPtr tmp = NULL; domflags = 0; diff --git a/src/rpc/virnetdaemon.c b/src/rpc/virnetdaemon.c index c6ed65c8c3..6cce4b7004 100644 --- a/src/rpc/virnetdaemon.c +++ b/src/rpc/virnetdaemon.c @@ -834,7 +834,7 @@ virNetDaemonQuitTimer(int timer ATTRIBUTE_UNUSED, int *quitCount = opaque; (*quitCount)++; - VIR_DEBUG("quitCount=%d", *quitCount); + VIR_WARN("quitCount=%d", *quitCount); } @@ -912,6 +912,7 @@ virNetDaemonRun(virNetDaemonPtr dmn) if (dmn->quitRequested && daemonServerWorkersDone(dmn)) { dmn->quit = true; + VIR_WARN ("quitRequested and no workers remain"); } else { /* Firing every 1/2 second and quitTimeout in seconds, force * an exit when there are still worker threads running and we diff --git a/src/rpc/virnetserver.c b/src/rpc/virnetserver.c index 053ef8a5ab..109f369bac 100644 --- a/src/rpc/virnetserver.c +++ b/src/rpc/virnetserver.c @@ -833,7 +833,7 @@ virNetServerQuitRequested(virNetServerPtr srv) if (!srv) return; - VIR_DEBUG("Quit server requested '%s'", srv->name); + VIR_WARN ("Quit server requested '%s'", srv->name); for (i = 0; i < srv->nservices; i++) virNetServerServiceToggle(srv->services[i], false); @@ -860,7 +860,7 @@ virNetServerWorkerCount(virNetServerPtr srv) virThreadPoolGetPriorityWorkers(srv->workers); if (workerCount > 0) - VIR_DEBUG("server '%s' still has %zd workers", srv->name, workerCount); + VIR_WARN ("server '%s' still has %zd workers", srv->name, workerCount); virObjectUnlock(srv); diff --git a/src/util/virthreadpool.c b/src/util/virthreadpool.c index 137c5d1746..fc7bc64fb5 100644 --- a/src/util/virthreadpool.c +++ b/src/util/virthreadpool.c @@ -136,8 +136,10 @@ static void virThreadPoolWorker(void *opaque) goto out; } - if (pool->quit) + if (pool->quit) { + VIR_WARN("Quit set"); break; + } if (priority) { job = pool->jobList.firstPrio; @@ -330,7 +332,7 @@ virThreadPoolQuitRequested(virThreadPoolPtr pool) { virMutexLock(&pool->mutex); - VIR_DEBUG("nWorkers=%zd, nPrioWorkers=%zd jobQueueDepth=%zd", + VIR_WARN ("nWorkers=%zd, nPrioWorkers=%zd jobQueueDepth=%zd", pool->nWorkers, pool->nPrioWorkers, pool->jobQueueDepth); virThreadPoolSetQuit(pool); @@ -415,8 +417,10 @@ int virThreadPoolSendJob(virThreadPoolPtr pool, virThreadPoolJobPtr job; virMutexLock(&pool->mutex); - if (pool->quit) + if (pool->quit) { + VIR_WARN("Quit set"); goto error; + } if (pool->freeWorkers - pool->jobQueueDepth <= 0 && pool->nWorkers < pool->maxWorkers && -- 2.17.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list