This change gives me a segfault at startup when running libvirtd from my working directory. Reverting these two patches solves the issue. Thread 17 "lt-libvirtd" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffc0dff700 (LWP 4338)] virDomainObjIsActive (dom=0x7fff9c1a6160) at ../../src/conf/domain_conf.h:2800 2800 return dom->def->id != -1; (gdb) bt #0 virDomainObjIsActive (dom=0x7fff9c1b2ec0) at ../../src/conf/domain_conf.h:2800 #1 0x00007fffdb706eb1 in qemuMonitorOpen (vm=0x7fff9c1b2ec0, config=0x7fffd8dfe520, retry=true, timeout=30, cb=0x7fffdb7fdd80 <callbacks>, opaque=0x0) at ../../src/qemu/qemu_monitor.c:848 #2 0x00007fffdb6f0812 in qemuProcessQMPConnectMonitor (proc=0x7fff9c19f7e0) at ../../src/qemu/qemu_process.c:8662 #3 0x00007fffdb6f091e in qemuProcessQMPStart (proc=0x7fff9c19f7e0) at ../../src/qemu/qemu_process.c:8707 #4 0x00007fffdb66bff7 in virQEMUCapsInitQMPSingle (qemuCaps=0x7fff9c19f260, libDir=0x7fff9c130b40 "/var/lib/libvirt/qemu", runUid=107, runGid=107, onlyTCG=false) at ../../src/qemu/qemu_capabilities.c:4663 #5 0x00007fffdb66c096 in virQEMUCapsInitQMP (qemuCaps=0x7fff9c19f260, libDir=0x7fff9c130b40 "/var/lib/libvirt/qemu", runUid=107, runGid=107) at ../../src/qemu/qemu_capabilities.c:4686 #6 0x00007fffdb66c285 in virQEMUCapsNewForBinaryInternal (hostArch=VIR_ARCH_X86_64, binary=0x7fff9c1ac160 "/usr/bin/qemu-system- i386", libDir=0x7fff9c130b40 "/var/lib/libvirt/qemu", runUid=107, runGid=107, microcodeVersion=180, kernelVersion=0x7fff9c14aae0 "5.2.7- 200.fc30.x86_64 #1 SMP Thu Aug 8 05:35:29 UTC 2019") at ../../src/qemu/qemu_capabilities.c:4739 #7 0x00007fffdb66c3df in virQEMUCapsNewData (binary=0x7fff9c1ac160 "/usr/bin/qemu-system-i386", privData=0x7fff9c14af90) at ../../src/qemu/qemu_capabilities.c:4772 #8 0x00007ffff7c0a803 in virFileCacheNewData (cache=0x7fff9c14ade0, name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at ../../src/util/virfilecache.c:208 #9 0x00007ffff7c0aa9e in virFileCacheValidate (cache=0x7fff9c14ade0, name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386", data=0x7fffd8dfe820) at ../../src/util/virfilecache.c:279 #10 0x00007ffff7c0aba3 in virFileCacheLookup (cache=0x7fff9c14ade0, name=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at ../../src/util/virfilecache.c:312 #11 0x00007fffdb66c79c in virQEMUCapsCacheLookup (cache=0x7fff9c14ade0, binary=0x7fff9c1ac160 "/usr/bin/qemu-system-i386") at ../../src/qemu/qemu_capabilities.c:4910 #12 0x00007fffdb663e08 in virQEMUCapsInitGuest (caps=0x7fff9c195bd0, cache=0x7fff9c14ade0, hostarch=VIR_ARCH_X86_64, guestarch=VIR_ARCH_I686) at ../../src/qemu/qemu_capabilities.c:802 #13 0x00007fffdb664384 in virQEMUCapsInit (cache=0x7fff9c14ade0) at ../../src/qemu/qemu_capabilities.c:958 #14 0x00007fffdb6d8ad1 in virQEMUDriverCreateCapabilities (driver=0x7fff9c117590) at ../../src/qemu/qemu_conf.c:1254 #15 0x00007fffdb72f347 in qemuStateInitialize (privileged=true, callback=0x5555555781b4 <daemonInhibitCallback>, opaque=0x555555617dd0) at ../../src/qemu/qemu_driver.c:937 #16 0x00007ffff7e1b844 in virStateInitialize (privileged=true, mandatory=false, callback=0x5555555781b4 <daemonInhibitCallback>, opaque=0x555555617dd0) at ../../src/libvirt.c:666 #17 0x0000555555578490 in daemonRunStateInit (opaque=0x555555617dd0) at ../../src/remote/remote_daemon.c:846 #18 0x00007ffff7bf8c1b in virThreadHelper (data=0x5555556327d0) at ../../src/util/virthread.c:196 #19 0x00007ffff71f84c0 in start_thread () from /lib64/libpthread.so.0 #20 0x00007ffff7126553 in clone () from /lib64/libc.so.6 On Tue, 2019-10-08 at 17:15 -0300, Daniel Henrique Barboza wrote: > > On 10/8/19 6:15 AM, Michal Privoznik wrote: > > When connecting to qemu's monitor the @vm object is unlocked. > > This is justified - connecting may take a long time and we don't > > want to wait with the domain object locked. However, just before > > the domain object is locked again, the monitor's FD is registered > > in the event loop. Therefore, there is a small window where the > > event loop has a chance to call a handler for an event that > > occurred on the monitor FD but vm is not initalized properly just > > yet (i.e. priv->mon is not set). For instance, if there's an > > incoming migration, qemu creates its socket but then fails to > > initialize (for various reasons, I'm reproducing this by using > > hugepages but leaving the HP pool empty) then the following may > > happen: > > > > 1) qemuConnectMonitor() unlocks @vm > > > > 2) qemuMonitorOpen() connects to the monitor socket and by > > calling qemuMonitorOpenInternal() which subsequently calls > > qemuMonitorRegister() the event handler is installed > > > > 3) qemu fails to initialize and exit()-s, which closes the > > monitor > > > > 4) The even loop sees EOF on the monitor and the control gets to > > qemuProcessEventHandler() which locks @vm and calls > > processMonitorEOFEvent() which then calls > > qemuMonitorLastError(priv->mon). But priv->mon is not set just > > yet. > > > > 5) qemuMonitorLastError() dereferences NULL pointer > > > > The solution is to unlock the domain object for a shorter time > > and most importantly, register event handler with domain object > > locked so that any possible event processing is done only after > > @vm's private data was properly initialized. > > > > This issue is also mentioned in v4.2.0-99-ga5a777a8ba. > > > > Since we are unlocking @vm and locking it back, another thread > > might have destroyed the domain meanwhile. Therefore we have to > > check if domain is still activem, and we have to do it at the > > s/activem/active > > > > same place where domain lock is acquired back, i.e. in > > qemuMonitorOpen(). This creates a small problem for our test > > suite which calls qemuMonitorOpen() directly and passes @vm which > > has no definition. This makes virDomainObjIsActive() call crash. > > Fortunately, allocating empty domain definition is sufficient. > > > > Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> > > --- > > LGTM. Hopefully this will be enough to prevent this vm lock race > with the monitor at VM startup. > > > Reviewed-by: Daniel Henrique Barboza <danielhb413@xxxxxxxxx> > > > This is an alternative approach to: > > > > https://www.redhat.com/archives/libvir-list/2019-September/msg00749.html > > > > src/qemu/qemu_monitor.c | 33 +++++++++++++++++++++++++---- > > ---- > > src/qemu/qemu_process.c | 12 ------------ > > tests/qemumonitortestutils.c | 2 ++ > > 3 files changed, 27 insertions(+), 20 deletions(-) > > > > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c > > index 58de26a276..a53d3b1d60 100644 > > --- a/src/qemu/qemu_monitor.c > > +++ b/src/qemu/qemu_monitor.c > > @@ -810,35 +810,52 @@ qemuMonitorOpen(virDomainObjPtr vm, > > qemuMonitorCallbacksPtr cb, > > void *opaque) > > { > > - int fd; > > + int fd = -1; > > bool hasSendFD = false; > > - qemuMonitorPtr ret; > > + qemuMonitorPtr ret = NULL; > > > > timeout += QEMU_DEFAULT_MONITOR_WAIT; > > > > + /* Hold an extra reference because we can't allow 'vm' to be > > + * deleted until the monitor gets its own reference. */ > > + virObjectRef(vm); > > + > > switch (config->type) { > > case VIR_DOMAIN_CHR_TYPE_UNIX: > > hasSendFD = true; > > - if ((fd = qemuMonitorOpenUnix(config->data.nix.path, > > - vm->pid, retry, timeout)) < > > 0) > > - return NULL; > > + virObjectUnlock(vm); > > + fd = qemuMonitorOpenUnix(config->data.nix.path, > > + vm->pid, retry, timeout); > > + virObjectLock(vm); > > break; > > > > case VIR_DOMAIN_CHR_TYPE_PTY: > > - if ((fd = qemuMonitorOpenPty(config->data.file.path)) < 0) > > - return NULL; > > + virObjectUnlock(vm); > > + fd = qemuMonitorOpenPty(config->data.file.path); > > + virObjectLock(vm); > > break; > > > > default: > > virReportError(VIR_ERR_INTERNAL_ERROR, > > _("unable to handle monitor type: %s"), > > virDomainChrTypeToString(config->type)); > > - return NULL; > > + break; > > + } > > + > > + if (fd < 0) > > + goto cleanup; > > + > > + if (!virDomainObjIsActive(vm)) { > > + virReportError(VIR_ERR_OPERATION_FAILED, "%s", > > + _("domain is not running")); > > + goto cleanup; > > } > > > > ret = qemuMonitorOpenInternal(vm, fd, hasSendFD, cb, opaque); > > + cleanup: > > if (!ret) > > VIR_FORCE_CLOSE(fd); > > + virObjectUnref(vm); > > return ret; > > } > > > > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > > index a50cd54393..b303e5ba05 100644 > > --- a/src/qemu/qemu_process.c > > +++ b/src/qemu/qemu_process.c > > @@ -1955,13 +1955,8 @@ qemuConnectMonitor(virQEMUDriverPtr driver, > > virDomainObjPtr vm, int asyncJob, > > * 1GiB of guest RAM. */ > > timeout = vm->def->mem.total_memory / (1024 * 1024); > > > > - /* Hold an extra reference because we can't allow 'vm' to be > > - * deleted until the monitor gets its own reference. */ > > - virObjectRef(vm); > > - > > ignore_value(virTimeMillisNow(&priv->monStart)); > > monConfig = virObjectRef(priv->monConfig); > > - virObjectUnlock(vm); > > > > mon = qemuMonitorOpen(vm, > > monConfig, > > @@ -1978,15 +1973,8 @@ qemuConnectMonitor(virQEMUDriverPtr driver, > > virDomainObjPtr vm, int asyncJob, > > qemuProcessMonitorLogFree); > > } > > > > - virObjectLock(vm); > > virObjectUnref(monConfig); > > - virObjectUnref(vm); > > priv->monStart = 0; > > - > > - if (!virDomainObjIsActive(vm)) { > > - qemuMonitorClose(mon); > > - mon = NULL; > > - } > > priv->mon = mon; > > > > if (qemuSecurityClearSocketLabel(driver->securityManager, vm- > > >def) < 0) { > > diff --git a/tests/qemumonitortestutils.c > > b/tests/qemumonitortestutils.c > > index e9dff123f8..c7580c5f28 100644 > > --- a/tests/qemumonitortestutils.c > > +++ b/tests/qemumonitortestutils.c > > @@ -1085,6 +1085,8 @@ > > qemuMonitorCommonTestNew(virDomainXMLOptionPtr xmlopt, > > test->vm = virDomainObjNew(xmlopt); > > if (!test->vm) > > goto error; > > + if (!(test->vm->def = virDomainDefNew())) > > + goto error; > > } > > > > if (virNetSocketNewListenUNIX(path, 0700, geteuid(), > > getegid(), > > -- > libvir-list mailing list > libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list