On Mon, 2014-07-21 at 22:34 -0600, Eric Blake wrote: > We were not directly saving the domain XML to file after starting > or finishing a blockcopy. Without the startup write, a libvirtd > restart in the middle of a copy job would forget that the job was > underway. Then at pivot, we were indirectly writing new XML in > reaction to events that occur as we stop and restart the guest CPUs. > But there was a race: since pivot is an async action, it is possible > that the guest is restarted before the pivot completes, so if XML > changes during the event, that change was not written. The original > blockcopy code cleared out the <mirror> element prior to restarting > the CPUs, but this is also a race, observed if a user does an async > pivot and a dumpxml before the event occurs. Furthermore, this race > will interfere with active commit, because that code relies on the > <mirror> element at the time of the qemu event to determine whether > to inform the user of a normal commit or an active commit. > > Fix things by saving state any time we modify live XML, and by > delaying XML modifications until after the event completes. We > still need a solution for the case of libvirtd restarting in between > requesting qemu to do the pivot and the actual event (that is, if > libvirtd misses the event, but learns the job is complete thanks to > a block query, then the updated state still needs to be updated in > live XML), but that will be a later patch, in part because we want > to start taking advantage of newer qemu's ability to keep the job > around after completion rather than the current usage where the job > disappears both on error and on success. > > * src/qemu/qemu_driver.c (qemuDomainBlockCopy): Track XML change > on disk. > (qemuDomainBlockPivot): Move post-pivot XML rewrites... > * src/qemu/qemu_process.c (qemuProcessHandleBlockJob): ...here, > and expand to cover case of persistent vm. > > Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> > --- > src/qemu/qemu_driver.c | 46 ++++++++++++++++++--------------- > src/qemu/qemu_process.c | 69 +++++++++++++++++++++++++++++++++++++++++++------ > 2 files changed, 86 insertions(+), 29 deletions(-) > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 368e112..a5eaead 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -14843,39 +14841,41 @@ qemuDomainBlockPivot(virConnectPtr conn, > virSecurityManagerSetDiskLabel(driver->securityManager, vm->def, disk) < 0)) > goto cleanup; > > - /* Attempt the pivot. */ > + /* Attempt the pivot. We will update the domain later when qemu > + * emits an event, telling us if we succeeded or failed. > + * XXX On libvirtd restarts, if we missed the qemu event, we need > + * to double check what state qemu is in. > + * XXX We should be using qemu's rerror flag to make sure the job > + * remains alive until we know it's final state. > + * XXX If the abort command is synchronous but the qemu event is > + * that the pivot failed, we need to reflect that failure into the > + * overall return value. */ > qemuDomainObjEnterMonitor(driver, vm); > ret = qemuMonitorDrivePivot(priv->mon, device, disk->mirror->path, format); > qemuDomainObjExitMonitor(driver, vm); > > - if (ret == 0) { > - /* XXX We want to revoke security labels and disk lease, as > - * well as audit that revocation, before dropping the original > - * source. But it gets tricky if both source and mirror share > - * common backing files (we want to only revoke the non-shared > - * portion of the chain, and is made more difficult by the > - * fact that we aren't tracking the full chain ourselves; so > - * for now, we leak the access to the original. */ > - virStorageSourceFree(oldsrc); > - oldsrc = NULL; > - } else { > + if (ret < 0) { > /* On failure, qemu abandons the mirror, and reverts back to > * the source disk (RHEL 6.3 has a bug where the revert could > * cause catastrophic failure in qemu, but we don't need to > * worry about it here as it is not an upstream qemu problem. */ > /* XXX should we be parsing the exact qemu error, or calling > * 'query-block', to see what state we really got left in > - * before killing the mirroring job? And just as on the > - * success case, there's security labeling to worry about. */ > + * before killing the mirroring job? > + * XXX We want to revoke security labels and disk lease, as > + * well as audit that revocation, before dropping the original > + * source. But it gets tricky if both source and mirror share > + * common backing files (we want to only revoke the non-shared > + * portion of the chain); so for now, we leak the access to > + * the original. */ > virStorageSourceFree(disk->mirror); > - } > > - disk->mirror = NULL; > - disk->mirroring = false; > - disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN; > + disk->mirror = NULL; > + disk->mirroring = false; > + disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN; > + } > > cleanup: > - /* revert to original disk def on failure */ > if (oldsrc) > disk->src = oldsrc; > > @@ -15329,6 +15329,10 @@ qemuDomainBlockCopy(virDomainObjPtr vm, > mirror = NULL; > disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_COPY; > > + if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) > + VIR_WARN("Unable to save status on vm %s after state change", > + vm->def->name); > + > endjob: > if (need_unlink && unlink(dest)) > VIR_WARN("unable to unlink just-created %s", dest); > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 65e9faf..3229f81 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -1016,6 +1016,7 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED, > virObjectEventPtr event2 = NULL; > const char *path; > virDomainDiskDefPtr disk; > + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); Hello Eric, The cfg should be unref by virObjectUnref(cfg) at the end. Thanks, Chen > > virObjectLock(vm); > disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias); > @@ -1030,15 +1031,67 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED, > event = virDomainEventBlockJobNewFromObj(vm, path, type, status); > event2 = virDomainEventBlockJob2NewFromObj(vm, disk->dst, type, > status); > - /* XXX If we completed a block pull or commit, then recompute > - * the cached backing chain to match. Better would be storing > - * the chain ourselves rather than reprobing, but we haven't > - * quite completed that conversion to use our XML tracking. */ > - if ((type == VIR_DOMAIN_BLOCK_JOB_TYPE_PULL || > - type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT || > - type == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT) && > - status == VIR_DOMAIN_BLOCK_JOB_COMPLETED) > + > + /* If we completed a block pull or commit, then update the XML > + * to match. */ > + if (status == VIR_DOMAIN_BLOCK_JOB_COMPLETED) { > + if (disk->mirror) { > + virDomainDiskDefPtr persistDisk = NULL; > + > + if (vm->newDef) { > + int indx = virDomainDiskIndexByName(vm->newDef, disk->dst, > + false); > + virStorageSourcePtr copy = NULL; > + > + if (indx >= 0) { > + persistDisk = vm->newDef->disks[indx]; > + copy = virStorageSourceCopy(disk->mirror, false); > + if (virStorageSourceInitChainElement(copy, > + persistDisk->src, > + false) < 0) { > + VIR_WARN("Unable to update persistent definition " > + "on vm %s after block job", > + vm->def->name); > + virStorageSourceFree(copy); > + copy = NULL; > + persistDisk = NULL; > + } > + } > + if (copy) { > + virStorageSourceFree(persistDisk->src); > + persistDisk->src = copy; > + } > + } > + > + /* XXX We want to revoke security labels and disk > + * lease, as well as audit that revocation, before > + * dropping the original source. But it gets tricky > + * if both source and mirror share common backing > + * files (we want to only revoke the non-shared > + * portion of the chain); so for now, we leak the > + * access to the original. */ > + virStorageSourceFree(disk->src); > + disk->src = disk->mirror; > + disk->mirror = NULL; > + disk->mirroring = false; > + disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN; > + > + if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, > + vm) < 0) > + VIR_WARN("Unable to save status on vm %s after block job", > + vm->def->name); > + if (persistDisk && virDomainSaveConfig(cfg->configDir, > + vm->newDef) < 0) > + VIR_WARN("Unable to update persistent definition on vm %s " > + "after block job", vm->def->name); > + } > + /* Recompute the cached backing chain to match our > + * updates. Better would be storing the chain ourselves > + * rather than reprobing, but we haven't quite completed > + * that conversion to use our XML tracking. */ > qemuDomainDetermineDiskChain(driver, vm, disk, true); > + } > + > if (disk->mirror && > (type == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY || > type == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT)) { -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list