On Wed, Apr 17, 2019 at 09:09:16 -0500, Eric Blake wrote: > A lot of this work heavily copies from the existing snapshot > APIs. The interaction with qemu during create/delete still > needs to be implemented, but this takes care of all the libvirt > metadata (saving and restoring XML, and tracking the relations > between multiple checkpoints). I'd prefer at least the 'qemu_conf' related stuff to be separate. > > Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> > --- > src/qemu/qemu_block.h | 3 + > src/qemu/qemu_conf.h | 2 + > src/qemu/qemu_domain.h | 15 + > src/qemu/qemu_block.c | 12 + > src/qemu/qemu_conf.c | 5 + > src/qemu/qemu_domain.c | 133 +++++++ > src/qemu/qemu_driver.c | 843 +++++++++++++++++++++++++++++++++++++++++ > 7 files changed, 1013 insertions(+) [...] > diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c > index 7d9f7ec3ab..b496b08afa 100644 > --- a/src/qemu/qemu_block.c > +++ b/src/qemu/qemu_block.c > @@ -1647,3 +1647,15 @@ qemuBlockStorageGetCopyOnReadProps(virDomainDiskDefPtr disk) > > return ret; > } > + > +const char * > +qemuBlockNodeLookup(virDomainObjPtr vm, const char *disk) ...DiskNodeFormatLookup > +{ > + size_t i; > + > + for (i = 0; i < vm->def->ndisks; i++) { > + if (STREQ(vm->def->disks[i]->dst, disk)) > + return vm->def->disks[i]->src->nodeformat; > + } > + return NULL; > +} > diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c > index daea11dacb..1312e753db 100644 > --- a/src/qemu/qemu_conf.c > +++ b/src/qemu/qemu_conf.c [...] > @@ -242,6 +244,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) > goto error; > if (virAsprintf(&cfg->snapshotDir, "%s/qemu/snapshot", cfg->configBaseDir) < 0) > goto error; > + if (virAsprintf(&cfg->checkpointDir, "%s/qemu/checkpoint", cfg->configBaseDir) < 0) > + goto error; This directory will need to be versionable as we need to record alternate histories when creating snapshots. I'm not sure how to achieve thant though. We'll probably need to add a directory into 'snapshot' folder for each snapshot which will copy the whole checkpoint tree. > if (virAsprintf(&cfg->autoDumpPath, "%s/qemu/dump", cfg->configBaseDir) < 0) > goto error; > if (virAsprintf(&cfg->channelTargetDir, [...] > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index db67fe2193..739074e17d 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c [...] > @@ -8479,6 +8480,40 @@ qemuDomainSnapshotWriteMetadata(virDomainObjPtr vm, > return ret; > } > > +int > +qemuDomainCheckpointWriteMetadata(virDomainObjPtr vm, > + virDomainMomentObjPtr checkpoint, > + virCapsPtr caps, > + virDomainXMLOptionPtr xmlopt, > + const char *checkpointDir) > +{ > + unsigned int flags = VIR_DOMAIN_CHECKPOINT_FORMAT_SECURE | > + VIR_DOMAIN_CHECKPOINT_FORMAT_INTERNAL; > + virDomainCheckpointDefPtr def = virDomainCheckpointObjGetDef(checkpoint); > + VIR_AUTOFREE(char *) newxml = NULL; > + VIR_AUTOFREE(char *) chkDir = NULL; > + VIR_AUTOFREE(char *) chkFile = NULL; > + > + if (virDomainCheckpointGetCurrent(vm->checkpoints) == checkpoint) > + flags |= VIR_DOMAIN_CHECKPOINT_FORMAT_CURRENT; > + newxml = virDomainCheckpointDefFormat(def, caps, xmlopt, flags); > + if (newxml == NULL) > + return -1; > + > + if (virAsprintf(&chkDir, "%s/%s", checkpointDir, vm->def->name) < 0) The path seems backwards, but that's an old mistake. > + return -1; > + if (virFileMakePath(chkDir) < 0) { > + virReportSystemError(errno, _("cannot create checkpoint directory '%s'"), > + chkDir); > + return -1; > + } [...] > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index c443c881d5..144cb51d89 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c [...] > @@ -222,6 +223,40 @@ qemuSnapObjFromSnapshot(virDomainObjPtr vm, > return qemuSnapObjFromName(vm, snapshot->name); > } > > +/* Looks up the domain object from checkpoint and unlocks the > + * driver. The returned domain object is locked and ref'd and the > + * caller must call virDomainObjEndAPI() on it. */ > +static virDomainObjPtr > +qemuDomObjFromCheckpoint(virDomainCheckpointPtr checkpoint) > +{ > + return qemuDomObjFromDomain(checkpoint->domain); > +} > + > + > +/* Looks up checkpoint object from VM and name */ > +static virDomainMomentObjPtr > +qemuCheckObjFromName(virDomainObjPtr vm, This should be called qemuCheckpointObjFromName. Otherwise it implies a verb. Also all other functions have the full 'Checkpoint' > + const char *name) > +{ > + virDomainMomentObjPtr chk = NULL; > + chk = virDomainCheckpointFindByName(vm->checkpoints, name); > + if (!chk) > + virReportError(VIR_ERR_NO_DOMAIN_CHECKPOINT, > + _("no domain checkpoint with matching name '%s'"), > + name); > + > + return chk; > +} > + > + > +/* Looks up checkpoint object from VM and checkpointPtr */ > +static virDomainMomentObjPtr > +qemuCheckObjFromCheckpoint(virDomainObjPtr vm, > + virDomainCheckpointPtr checkpoint) This too. > +{ > + return qemuCheckObjFromName(vm, checkpoint->name); > +} > + > static int > qemuAutostartDomain(virDomainObjPtr vm, > void *opaque) > @@ -525,6 +560,124 @@ qemuDomainSnapshotLoad(virDomainObjPtr vm, > } > > > +static int > +qemuDomainCheckpointLoad(virDomainObjPtr vm, > + void *data) > +{ > + char *baseDir = (char *)data; > + char *chkDir = NULL; > + DIR *dir = NULL; > + struct dirent *entry; > + char *xmlStr; > + char *fullpath; > + virDomainCheckpointDefPtr def = NULL; > + virDomainMomentObjPtr chk = NULL; > + virDomainMomentObjPtr current = NULL; > + bool cur; > + unsigned int flags = (VIR_DOMAIN_CHECKPOINT_PARSE_REDEFINE | > + VIR_DOMAIN_CHECKPOINT_PARSE_INTERNAL); > + int ret = -1; > + virCapsPtr caps = NULL; > + int direrr; > + > + virObjectLock(vm); > + if (virAsprintf(&chkDir, "%s/%s", baseDir, vm->def->name) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Failed to allocate memory for " > + "checkpoint directory for domain %s"), > + vm->def->name); > + goto cleanup; > + } > + > + if (!(caps = virQEMUDriverGetCapabilities(qemu_driver, false))) > + goto cleanup; > + > + VIR_INFO("Scanning for checkpoints for domain %s in %s", vm->def->name, > + chkDir); > + > + if (virDirOpenIfExists(&dir, chkDir) <= 0) > + goto cleanup; > + > + while ((direrr = virDirRead(dir, &entry, NULL)) > 0) { > + /* NB: ignoring errors, so one malformed config doesn't > + kill the whole process */ > + VIR_INFO("Loading checkpoint file '%s'", entry->d_name); > + > + if (virAsprintf(&fullpath, "%s/%s", chkDir, entry->d_name) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", virAsprintf reports it's own error. > + _("Failed to allocate memory for path")); > + continue; > + } > + > + if (virFileReadAll(fullpath, 1024*1024*1, &xmlStr) < 0) { > + /* Nothing we can do here, skip this one */ > + virReportSystemError(errno, This too. > + _("Failed to read checkpoint file %s"), > + fullpath); > + VIR_FREE(fullpath); > + continue; > + } > + > + def = virDomainCheckpointDefParseString(xmlStr, caps, > + qemu_driver->xmlopt, &cur, > + flags); > + if (!def || virDomainCheckpointAlignDisks(def) < 0) { > + /* Nothing we can do here, skip this one */ > + virReportError(VIR_ERR_INTERNAL_ERROR, Also this one. > + _("Failed to parse checkpoint XML from file '%s'"), > + fullpath); > + VIR_FREE(fullpath); > + VIR_FREE(xmlStr); > + continue; > + } > + > + chk = virDomainCheckpointAssignDef(vm->checkpoints, def); > + if (chk == NULL) { > + virDomainCheckpointDefFree(def); > + } else if (cur) { > + if (current) > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Too many snapshots claiming to be current for domain %s"), Stale error message. > + vm->def->name); > + current = chk; > + } > + > + VIR_FREE(fullpath); > + VIR_FREE(xmlStr); > + } > + if (direrr < 0) > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Failed to fully read directory %s"), > + chkDir); > + > + virDomainCheckpointSetCurrent(vm->checkpoints, current); > + > + if (virDomainCheckpointUpdateRelations(vm->checkpoints) < 0) > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Checkpoints have inconsistent relations for domain %s"), > + vm->def->name); > + > + /* FIXME: qemu keeps internal track of bitmaps, which form the > + * basis for checkpoints; it would be nice if we could update our > + * internal state to reflect that information automatically. But > + * qemu 3.0 did not have access to this via qemu-img for offline The capabilities which are used for this are present in 4.0 aren't they? > + * images (you have to use QMP commands on a running guest), and > + * it also does not track <parent> relations which we find > + * important in our metadata. > + */ > + > + virResetLastError(); > + [...] > @@ -16793,6 +16963,651 @@ qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot, [...] > +/* Called inside job lock */ > +static int > +qemuDomainCheckpointPrepare(virQEMUDriverPtr driver, virCapsPtr caps, > + virDomainObjPtr vm, > + virDomainCheckpointDefPtr def) > +{ > + int ret = -1; > + size_t i; > + char *xml = NULL; > + qemuDomainObjPrivatePtr priv = vm->privateData; > + > + /* Easiest way to clone inactive portion of vm->def is via > + * conversion in and back out of xml. */ > + if (!(xml = qemuDomainDefFormatLive(driver, vm->def, priv->origCPU, > + true, true)) || > + !(def->common.dom = virDomainDefParseString(xml, caps, driver->xmlopt, NULL, > + VIR_DOMAIN_DEF_PARSE_INACTIVE | > + VIR_DOMAIN_DEF_PARSE_SKIP_VALIDATE))) > + goto cleanup; > + > + if (virDomainCheckpointAlignDisks(def) < 0 || > + qemuBlockNodeNamesDetect(driver, vm, QEMU_ASYNC_JOB_NONE) < 0) This is always done when starting the qemu instance and on every update, thus should not be necessary. Also note that if necessary it must be guarded by if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) && With blockdev we shouldn't ever do that. > + goto cleanup; > + > + for (i = 0; i < def->ndisks; i++) { > + virDomainCheckpointDiskDefPtr disk = &def->disks[i]; > + > + if (disk->type != VIR_DOMAIN_CHECKPOINT_TYPE_BITMAP) > + continue; > + > + if (vm->def->disks[i]->src->format > 0 && > + vm->def->disks[i]->src->format != VIR_STORAGE_FILE_QCOW2) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("checkpoint for disk %s unsupported " > + "for storage type %s"), I'm not a fan of wrapping strings. > + disk->name, > + virStorageFileFormatTypeToString( > + vm->def->disks[i]->src->format)); And this doesn't help readability either. > + goto cleanup; > + } > + } > + > + ret = 0; > + > + cleanup: > + VIR_FREE(xml); > + return ret; > +} [...] > +static virDomainCheckpointPtr > +qemuDomainCheckpointCreateXML(virDomainPtr domain, > + const char *xmlDesc, > + unsigned int flags) > +{ > + virQEMUDriverPtr driver = domain->conn->privateData; > + virDomainObjPtr vm = NULL; > + char *xml = NULL; > + virDomainMomentObjPtr chk = NULL; > + virDomainCheckpointPtr checkpoint = NULL; > + virDomainCheckpointDefPtr def = NULL; > + virDomainMomentObjPtr current = NULL; > + bool update_current = true; > + bool redefine = flags & VIR_DOMAIN_CHECKPOINT_CREATE_REDEFINE; > + unsigned int parse_flags = 0; > + virDomainMomentObjPtr other = NULL; > + virQEMUDriverConfigPtr cfg = NULL; > + virCapsPtr caps = NULL; > + > + virCheckFlags(VIR_DOMAIN_CHECKPOINT_CREATE_REDEFINE | > + VIR_DOMAIN_CHECKPOINT_CREATE_CURRENT | > + VIR_DOMAIN_CHECKPOINT_CREATE_NO_METADATA, NULL); > + /* TODO: VIR_DOMAIN_CHECKPOINT_CREATE_QUIESCE */ > + > + if (redefine) > + parse_flags |= VIR_DOMAIN_CHECKPOINT_PARSE_REDEFINE; > + if ((redefine && !(flags & VIR_DOMAIN_CHECKPOINT_CREATE_CURRENT)) || > + (flags & VIR_DOMAIN_CHECKPOINT_CREATE_NO_METADATA)) > + update_current = false; > + > + if (!(vm = qemuDomObjFromDomain(domain))) > + goto cleanup; > + > + cfg = virQEMUDriverGetConfig(driver); > + > + if (virDomainCheckpointCreateXMLEnsureACL(domain->conn, vm->def, flags) < 0) > + goto cleanup; > + > + if (!(caps = virQEMUDriverGetCapabilities(driver, false))) > + goto cleanup; > + > + if (qemuProcessAutoDestroyActive(driver, vm)) { > + virReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("domain is marked for auto destroy")); > + goto cleanup; > + } > + > + if (!virDomainObjIsActive(vm)) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", > + _("cannot create checkpoint for inactive domain")); > + goto cleanup; > + } > + > + if (!(def = qemuDomainCheckpointDefParseString(driver, caps, xmlDesc, > + parse_flags))) > + goto cleanup; > + > + /* We are going to modify the domain below. */ > + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) > + goto cleanup; > + > + if (redefine) { > + if (virDomainCheckpointRedefinePrep(domain, vm, &def, &chk, > + driver->xmlopt, > + &update_current) < 0) > + goto endjob; > + } else if (qemuDomainCheckpointPrepare(driver, caps, vm, def) < 0) { > + goto endjob; > + } > + > + if (!chk) { > + if (!(chk = virDomainCheckpointAssignDef(vm->checkpoints, def))) > + goto endjob; > + > + def = NULL; > + } > + > + current = virDomainCheckpointGetCurrent(vm->checkpoints); > + if (current) { > + if (!redefine && > + VIR_STRDUP(chk->def->parent, current->def->name) < 0) > + goto endjob; > + if (update_current) { > + virDomainCheckpointSetCurrent(vm->checkpoints, NULL); > + if (qemuDomainCheckpointWriteMetadata(vm, current, > + driver->caps, driver->xmlopt, > + cfg->checkpointDir) < 0) > + goto endjob; > + } > + } > + > + /* actually do the checkpoint */ > + if (redefine) { > + /* XXX Should we validate that the redefined checkpoint even > + * makes sense, such as checking that qemu-img recognizes the > + * checkpoint bitmap name in at least one of the domain's disks? */ > + } else { > + /* TODO: issue QMP transaction command */ > + } > + > + /* If we fail after this point, there's not a whole lot we can do; > + * we've successfully created the checkpoint, so we have to go > + * forward the best we can. > + */ This is not as black and white as with snapshots. After a snapshot was created there's not that much we can do, because it would imply needing to commit the new filles into the backing etc. With checkpoints there's no data loss. > + checkpoint = virGetDomainCheckpoint(domain, chk->def->name); > + > + endjob: > + if (checkpoint && !(flags & VIR_DOMAIN_CHECKPOINT_CREATE_NO_METADATA)) { > + if (update_current) > + virDomainCheckpointSetCurrent(vm->checkpoints, chk); > + if (qemuDomainCheckpointWriteMetadata(vm, chk, driver->caps, > + driver->xmlopt, > + cfg->checkpointDir) < 0) { > + /* if writing of metadata fails, error out rather than trying > + * to silently carry on without completing the checkpoint */ > + virObjectUnref(checkpoint); > + checkpoint = NULL; > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("unable to save metadata for checkpoint %s"), > + chk->def->name); > + virDomainCheckpointObjListRemove(vm->checkpoints, chk); > + } else { > + other = virDomainCheckpointFindByName(vm->checkpoints, > + chk->def->parent); > + virDomainMomentSetParent(chk, other); > + } > + } else if (chk) { > + virDomainCheckpointObjListRemove(vm->checkpoints, chk); > + } > + > + qemuDomainObjEndJob(driver, vm); > + > + cleanup: > + virDomainObjEndAPI(&vm); > + virDomainCheckpointDefFree(def); > + VIR_FREE(xml); > + virObjectUnref(caps); > + virObjectUnref(cfg); > + return checkpoint; > +} > + > + [...] > +static int > +qemuDomainHasCurrentCheckpoint(virDomainPtr domain, > + unsigned int flags) This API seems a bit pointless if you can call qemuDomainCheckpointCurrent. > +{ > + virDomainObjPtr vm; > + int ret = -1; > + > + virCheckFlags(0, -1); > + > + if (!(vm = qemuDomObjFromDomain(domain))) > + return -1; > + > + if (virDomainHasCurrentCheckpointEnsureACL(domain->conn, vm->def) < 0) > + goto cleanup; > + > + ret = (virDomainCheckpointGetCurrent(vm->checkpoints) != NULL); > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return ret; > +} > + > + [...] > +static char * > +qemuDomainCheckpointGetXMLDesc(virDomainCheckpointPtr checkpoint, > + unsigned int flags) > +{ > + virQEMUDriverPtr driver = checkpoint->domain->conn->privateData; > + virDomainObjPtr vm = NULL; > + char *xml = NULL; > + virDomainMomentObjPtr chk = NULL; > + qemuDomainObjPrivatePtr priv; > + int rc; > + size_t i; > + virDomainCheckpointDefPtr chkdef; > + > + virCheckFlags(VIR_DOMAIN_CHECKPOINT_XML_SECURE | > + VIR_DOMAIN_CHECKPOINT_XML_NO_DOMAIN | > + VIR_DOMAIN_CHECKPOINT_XML_SIZE, NULL); > + > + if (!(vm = qemuDomObjFromCheckpoint(checkpoint))) > + return NULL; > + > + if (virDomainCheckpointGetXMLDescEnsureACL(checkpoint->domain->conn, vm->def, flags) < 0) > + goto cleanup; > + > + if (!(chk = qemuCheckObjFromCheckpoint(vm, checkpoint))) > + goto cleanup; > + chkdef = virDomainCheckpointObjGetDef(chk); > + > + if (flags & VIR_DOMAIN_CHECKPOINT_XML_SIZE) { > + /* TODO: for non-current checkpoint, this requires a QMP sequence per > + disk, since the stat of one bitmap in isolation is too low, > + and merely adding bitmap sizes may be too high: > + block-dirty-bitmap-create tmp > + for each bitmap from checkpoint to current: > + add bitmap to src_list > + block-dirty-bitmap-merge dst=tmp src_list > + query-block and read tmp size > + block-dirty-bitmap-remove tmp > + So for now, go with simpler query-blocks only for current. > + */ > + if (virDomainCheckpointGetCurrent(vm->checkpoints) != chk) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("cannot compute size for non-current checkpoint '%s'"), > + checkpoint->name); > + goto cleanup; > + } > + > + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) > + goto cleanup; > + > + if (virDomainObjCheckActive(vm) < 0) > + goto endjob; > + > + if (qemuBlockNodeNamesDetect(driver, vm, QEMU_ASYNC_JOB_NONE) < 0) > + goto endjob; > + > + /* TODO: Shouldn't need to recompute node names. */ Node names will not change for a individual image ... > + for (i = 0; i < chkdef->ndisks; i++) { > + virDomainCheckpointDiskDef *disk = &chkdef->disks[i]; > + > + if (disk->type != VIR_DOMAIN_CHECKPOINT_TYPE_BITMAP) > + continue; > + VIR_FREE(chk->def->dom->disks[disk->idx]->src->nodeformat); > + if (VIR_STRDUP(chk->def->dom->disks[disk->idx]->src->nodeformat, ... unlike disk index. > + qemuBlockNodeLookup(vm, disk->name)) < 0) > + goto endjob; > + } > + > + priv = vm->privateData; > + qemuDomainObjEnterMonitor(driver, vm); > + rc = qemuMonitorUpdateCheckpointSize(priv->mon, chkdef); > + if (qemuDomainObjExitMonitor(driver, vm) < 0) > + goto endjob; > + if (rc < 0) > + goto endjob; > + } > + > + xml = virDomainCheckpointDefFormat(chkdef, driver->caps, driver->xmlopt, > + flags); > + > + endjob: > + if (flags & VIR_DOMAIN_CHECKPOINT_XML_SIZE) > + qemuDomainObjEndJob(driver, vm); > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return xml; > +} [...] > +static int > +qemuDomainCheckpointHasMetadata(virDomainCheckpointPtr checkpoint, > + unsigned int flags) > +{ > + virDomainObjPtr vm = NULL; > + int ret = -1; > + virDomainMomentObjPtr chk = NULL; > + > + virCheckFlags(0, -1); > + > + if (!(vm = qemuDomObjFromCheckpoint(checkpoint))) > + return -1; > + > + if (virDomainCheckpointHasMetadataEnsureACL(checkpoint->domain->conn, vm->def) < 0) > + goto cleanup; > + > + if (!(chk = qemuCheckObjFromCheckpoint(vm, checkpoint))) > + goto cleanup; > + > + /* XXX Someday, we should recognize internal bitmaps in qcow2 > + * images that are not tied to a libvirt checkpoint; if we ever do > + * that, then we would have a reason to return 0 here. */ I don't think we should. While here it would probably suck less than with snapshots I don't think we should put the effort in allowing to work with non-libvirt initiated checkpoints. > + ret = 1; > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return ret; > +} [...] > static int qemuDomainQemuMonitorCommand(virDomainPtr domain, const char *cmd, > char **result, unsigned int flags) > { [...] > @@ -16903,6 +17718,16 @@ static virDomainPtr qemuDomainQemuAttach(virConnectPtr conn, > goto cleanup; > } > > + if (qemuProcessAttach(conn, driver, vm, pid, > + pidfile, monConfig, monJSON) < 0) { > + monConfig = NULL; > + qemuDomainRemoveInactive(driver, vm); > + qemuDomainObjEndJob(driver, vm); > + goto cleanup; > + } > + > + monConfig = NULL; > + What's this for? > if (qemuProcessAttach(conn, driver, vm, pid, > pidfile, monConfig, monJSON) < 0) { > qemuDomainRemoveInactive(driver, vm);
Attachment:
signature.asc
Description: PGP signature
-- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list