On 04/22/2010 09:19 AM, Stephen Shaw wrote: > On Thu, Apr 22, 2010 at 07:09, Daniel P. Berrange <berrange@xxxxxxxxxx> wrote: >> On Thu, Apr 22, 2010 at 08:54:30AM -0400, Chris Lalancette wrote: >>> On 04/22/2010 08:34 AM, Daniel P. Berrange wrote: >>>> On Wed, Apr 21, 2010 at 05:16:21PM -0400, Chris Lalancette wrote: >>>>> On 04/21/2010 04:34 PM, Stephen Shaw wrote: >>>>>> I'm getting a seg fault when running virsh snapshot-create 1, but only >>>>>> when virt-manager is open and connected. >>>>>> >>>>>> Here is some of the debug info I was able to come up with - >>>>>> http://fpaste.org/9GO6/ (bt) >>>>>> http://fpaste.org/7gkH/ ('thread apply all bt) >>>>>> >>>>>> * After the crash >>>>>> (gdb) p mon->msg >>>>>> $1 = (qemuMonitorMessagePtr) 0x0 >>>>>> >>>>>> >>>>>> nibbler:~ # libvirtd --version >>>>>> libvirtd (libvirt) 0.8.0 >>>>>> >>>>>> >>>>>> Please let me know if there is any other information you need. >>>>>> Stephen >>>>> >>>>> Thanks for the report. To be perfectly honest, I can't see how what >>>>> happened could happen :). But I'll take a closer look at it and see >>>>> if I can reproduce and see what is going on with it. >>>> >>>> I see thread locking problems in the code >>>> >>>> - qemuDomainSnapshotCreateXML() is calling monitor commands, but has >>>> not run qemuDomainObjBeginJobWithDriver() to ensure exclusive >>>> access to the monitor >>>> >>>> - qemuDomainSnapshotDiscard has same problem >>> >>> Yep, just fixing those now. I didn't quite understand the ObjBeginJob >>> before, but I think I'm understanding it now. This is probably the source of >>> the problems. >> >> There's some notes about the rules in src/qemu/THREADS.txt. >> >> You must acquire locks on objects in the following order, not missing >> any steps. >> >> qemud_driver (qemudDriverLock) >> virDomainObjPtr (implicit via virDomainObjFindByXXX) >> qemuMonitorPrivatePtr (qemuDomainObjBeginJob) >> qemuMonitorPtr (implicit via qemuDomainObjEnterMonitor) >> >> >> Note that qemuDomainObjEnterMonitor() will release the locks on the >> qemud_driver & virDomainObjPtr objects, once it has acquired the lock >> on qemuMonitorPtr. The qemuMonitorPrivatePtr object has a condition >> variable that ensures continued mutual exclusion, even though the >> qemud_driver, virDomainObjPtr object are now unlocked. >> >> So by missing qemuDomainObjBeginJob(), the condition variable was not >> acquired, and mutual exclusion was not assured >> >> Regards, >> Daniel > > Thanks for taking care of this. As soon as the fix is checked in I'll > rebuild libvirt and continue testing it. I still have to go over the rules more completely, but the attached patch seems to do more or less the right thing for me. If you could give it a quick test, that would be great. Thanks, -- Chris Lalancette
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5f4adfd..4ec4bd6 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10754,14 +10754,20 @@ static virDomainSnapshotPtr qemuDomainSnapshotCreateXML(virDomainPtr domain, } } else { + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + priv = vm->privateData; qemuDomainObjEnterMonitorWithDriver(driver, vm); if (qemuMonitorCreateSnapshot(priv->mon, def->name) < 0) { qemuDomainObjExitMonitorWithDriver(driver, vm); + if (qemuDomainObjEndJob(vm) == 0) + vm = NULL; goto cleanup; } qemuDomainObjExitMonitorWithDriver(driver, vm); - + if (qemuDomainObjEndJob(vm) == 0) + vm = NULL; } snap->def->state = vm->state; @@ -11034,18 +11040,18 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, rc = qemuMonitorLoadSnapshot(priv->mon, snap->def->name); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) - goto cleanup; + goto endjob; } else { if (qemuDomainSnapshotSetActive(vm, driver->snapshotDir) < 0) - goto cleanup; + goto endjob; rc = qemudStartVMDaemon(snapshot->domain->conn, driver, vm, NULL, -1); if (qemuDomainSnapshotSetInactive(vm, driver->snapshotDir) < 0) - goto cleanup; + goto endjob; if (rc < 0) - goto cleanup; + goto endjob; } if (snap->def->state == VIR_DOMAIN_PAUSED) { @@ -11057,7 +11063,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, rc = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) - goto cleanup; + goto endjob; } event = virDomainEventNewFromObj(vm, @@ -11083,17 +11089,18 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, } if (qemuDomainSnapshotSetActive(vm, driver->snapshotDir) < 0) - goto cleanup; + goto endjob; } vm->state = snap->def->state; ret = 0; -cleanup: - if (vm && qemuDomainObjEndJob(vm) == 0) +endjob: + if (qemuDomainObjEndJob(vm) == 0) vm = NULL; +cleanup: if (event) qemuDomainEventQueue(driver, event); if (vm) @@ -11247,6 +11254,9 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot, goto cleanup; } + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + if (flags & VIR_DOMAIN_SNAPSHOT_DELETE_CHILDREN) { rem.driver = driver; rem.vm = vm; @@ -11255,11 +11265,15 @@ static int qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot, virHashForEach(vm->snapshots.objs, qemuDomainSnapshotDiscardChildren, &rem); if (rem.err < 0) - goto cleanup; + goto endjob; } ret = qemuDomainSnapshotDiscard(driver, vm, snap); +endjob: + if (qemuDomainObjEndJob(vm) == 0) + vm = NULL; + cleanup: if (vm) virDomainObjUnlock(vm);
-- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list