Markus Groß wrote: > Am Montag 23 Mai 2011 04:30:12 schrieb Jim Fehlig: > >> Jim Fehlig wrote: >> >>> Markus Groß wrote: >>> >>> >>>> This patch adds save/restore functionality to the libxl driver. >>>> >>>> It is a v2 of this patch: >>>> https://www.redhat.com/archives/libvir-list/2011-April/msg00338.html >>>> >>>> v2: >>>> * header is now padded and has a version field >>>> * the correct restore function from libxl is used >>>> * only create the restore event once in libxlVmStart >>>> >>>> >>>> >>> Hi Markus, >>> >>> Finally found time to try your patch. Thanks for the patience :-). >>> >>> >>> >>>> However I ran into a reproducible segfault. >>>> Assume you saved a vm with: >>>> # virsh save domU foo.img >>>> >>>> >>>> >>> I think the problem actually lies in the save function. The domain does >>> not appear to be cleaned up properly. From xl's perspective after virsh >>> save domU foo.img >>> >>> xen33: # xl list >>> Name ID Mem VCPUs State >>> Time(s) >>> Domain-0 0 2023 8 >>> r----- 330.0 >>> (null) 11 1 >>> 2 --pssd 27.1 >>> >>> The orphaned domain disappears after libvirtd restart. >>> >> I manged to track down this problem, patch posted to xen-devel >> >> http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01314.html >> >> > > Great! I attached the current version of the save/restore patch. > It is rebased against the current master. > Ok, thanks, I have some comments below. > >>> >>> >>>> If you restore the save image, >>>> destroy the vm and restore it again, a segfault occurs: >>>> # virsh restore foo.img >>>> # virsh destroy domU >>>> # virsh restore foo.img >>>> # segfault >>>> >>>> >> But I still see the segfault, in addition to domain not booting and >> consuming 100% cpu on first restore :-(. I'll look at these issues next. >> I've identified cause of both of these issues. For the domain not _restoring_ (booting doesn't make much sense on a restore) and consuming 100% cpu, I've posted this patch (which still needs some work) https://www.redhat.com/archives/libvir-list/2011-May/msg01450.html The segfault ended up being a bug in libxc. I've posted a fix to xen-devel http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01511.html > diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h > index 65110cf..e75e418 100644 > --- a/src/libxl/libxl_conf.h > +++ b/src/libxl/libxl_conf.h > @@ -1,5 +1,6 @@ > /*---------------------------------------------------------------------------*/ > /* Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany. > + * Copyright (C) 2011 Univention GmbH. > * > * This library is free software; you can redistribute it and/or > * modify it under the terms of the GNU Lesser General Public > @@ -17,6 +18,7 @@ > * > * Authors: > * Jim Fehlig <jfehlig@xxxxxxxxxx> > + * Markus Groß <gross@xxxxxxxxxxxxx> > */ > /*---------------------------------------------------------------------------*/ > > @@ -81,6 +83,18 @@ struct _libxlDomainObjPrivate { > int eventHdl; > }; > > +#define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r" > +#define LIBXL_SAVE_VERSION 1 > + > +typedef struct _libxlSavefileHeader libxlSavefileHeader; > +typedef libxlSavefileHeader *libxlSavefileHeaderPtr; > +struct _libxlSavefileHeader { > + char magic[sizeof(LIBXL_SAVE_MAGIC)-1]; > + uint32_t version; > + uint32_t xmlLen; > + /* 24 bytes used, pad up to 64 bytes */ > + uint32_t unused[10]; > +}; > > # define libxlError(code, ...) \ > virReportErrorHelper(VIR_FROM_LIBXL, code, __FILE__, \ > diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c > index b2cc0e8..9bc71fd 100644 > --- a/src/libxl/libxl_driver.c > +++ b/src/libxl/libxl_driver.c > @@ -29,6 +29,7 @@ > #include <sys/utsname.h> > #include <math.h> > #include <libxl.h> > +#include <fcntl.h> > > #include "internal.h" > #include "logging.h" > @@ -60,11 +61,10 @@ > > static libxlDriverPrivatePtr libxl_driver = NULL; > > - > /* Function declarations */ > static int > -libxlVmStart(libxlDriverPrivatePtr driver, > - virDomainObjPtr vm, bool start_paused); > +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, > + bool start_paused, int restore_fd); > > > /* Function definitions */ > @@ -168,7 +168,7 @@ libxlAutostartDomain(void *payload, const void *name ATTRIBUTE_UNUSED, > virResetLastError(); > > if (vm->autostart && !virDomainObjIsActive(vm) && > - libxlVmStart(driver, vm, false) < 0) { > + libxlVmStart(driver, vm, false, -1) < 0) { > err = virGetLastError(); > VIR_ERROR(_("Failed to autostart VM '%s': %s"), > vm->def->name, > @@ -369,7 +369,7 @@ static void libxlEventHandler(int watch, > break; > case SHUTDOWN_reboot: > libxlVmReap(driver, vm, 0, VIR_DOMAIN_SHUTOFF_SHUTDOWN); > - libxlVmStart(driver, vm, 0); > + libxlVmStart(driver, vm, 0, -1); > break; > You'll need to add a 'case SHUTDOWN_suspend' here and just call libxlVmCleanup(). > default: > VIR_INFO("Unhandled shutdown_reason %d", info.shutdown_reason); > @@ -535,8 +535,8 @@ libxlFreeMem(libxlDomainObjPrivatePtr priv, libxl_domain_config *d_config) > * virDomainObjPtr should be locked on invocation > */ > static int > -libxlVmStart(libxlDriverPrivatePtr driver, > - virDomainObjPtr vm, bool start_paused) > +libxlVmStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, > + bool start_paused, int restore_fd) > { > libxl_domain_config d_config; > virDomainDefPtr def = vm->def; > @@ -559,12 +559,23 @@ libxlVmStart(libxlDriverPrivatePtr driver, > goto error; > } > > - ret = libxl_domain_create_new(&priv->ctx, &d_config, > - NULL, &child_console_pid, &domid); > + if (restore_fd < 0) > + ret = libxl_domain_create_new(&priv->ctx, &d_config, > + NULL, &child_console_pid, &domid); > + else > + ret = libxl_domain_create_restore(&priv->ctx, &d_config, NULL, > + &child_console_pid, &domid, > + restore_fd); > + > if (ret) { > - libxlError(VIR_ERR_INTERNAL_ERROR, > - _("libxenlight failed to create new domain '%s'"), > - d_config.c_info.name); > + if (restore_fd < 0) > + libxlError(VIR_ERR_INTERNAL_ERROR, > + _("libxenlight failed to create new domain '%s'"), > + d_config.c_info.name); > + else > + libxlError(VIR_ERR_INTERNAL_ERROR, > + _("libxenlight failed to restore domain '%s'"), > + d_config.c_info.name); > goto error; > } > > @@ -597,7 +608,9 @@ libxlVmStart(libxlDriverPrivatePtr driver, > goto error; > > event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, > - VIR_DOMAIN_EVENT_STARTED_BOOTED); > + restore_fd < 0 ? > + VIR_DOMAIN_EVENT_STARTED_BOOTED : > + VIR_DOMAIN_EVENT_STARTED_RESTORED); > libxlDomainEventQueue(driver, event); > > libxl_domain_config_destroy(&d_config); > @@ -1075,7 +1088,8 @@ libxlDomainCreateXML(virConnectPtr conn, const char *xml, > goto cleanup; > def = NULL; > > - if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0) < 0) { > + if (libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, > + -1) < 0) { > virDomainRemoveInactive(&driver->domains, vm); > vm = NULL; > goto cleanup; > @@ -1627,6 +1641,184 @@ libxlDomainGetState(virDomainPtr dom, > } > > static int > +libxlDomainSave(virDomainPtr dom, const char *to) > +{ > + libxlDriverPrivatePtr driver = dom->conn->privateData; > + virDomainObjPtr vm; > + libxlDomainObjPrivatePtr priv; > + virDomainEventPtr event = NULL; > + libxlSavefileHeader hdr; > + char *xml; > + uint32_t xml_len; > + int fd; > + int ret = -1; > + > + libxlDriverLock(driver); > + vm = virDomainFindByUUID(&driver->domains, dom->uuid); > + > + if (!vm) { > + char uuidstr[VIR_UUID_STRING_BUFLEN]; > + virUUIDFormat(dom->uuid, uuidstr); > + libxlError(VIR_ERR_NO_DOMAIN, > + _("No domain with matching uuid '%s'"), uuidstr); > + goto cleanup; > + } > + > + if (!virDomainObjIsActive(vm)) { > + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is not running")); > + goto cleanup; > + } > + > + priv = vm->privateData; > + > + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) { > + > + if ((fd = virFileOpenAs(to, O_CREAT|O_TRUNC|O_WRONLY, S_IRUSR|S_IWUSR, > + getuid(), getgid(), 0)) < 0) { > + virReportSystemError(-fd, > + _("Failed to create domain save file '%s'"), > + to); > + goto cleanup; > + } > + > + if ((xml = virDomainDefFormat(vm->def, 0)) == NULL) > + goto cleanup; > + xml_len = strlen(xml) + 1; > + > + memset(&hdr, 0, sizeof(hdr)); > + memcpy(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic)); > + hdr.version = LIBXL_SAVE_VERSION; > + hdr.xmlLen = xml_len; > + > + if (safewrite(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + _("Failed to write save file header")); > + goto cleanup; > + } > + > + if (safewrite(fd, xml, xml_len) != xml_len) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + _("Failed to write xml description")); > + goto cleanup; > + } > + > + if (libxl_domain_suspend(&priv->ctx, NULL, dom->id, fd) != 0) { > + libxlError(VIR_ERR_INTERNAL_ERROR, > + _("Failed to save domain '%d' with libxenlight"), > + dom->id); > + goto cleanup; > + } > + > + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, > + VIR_DOMAIN_EVENT_STOPPED_SAVED); > + > + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_SAVED) != 0) { > + libxlError(VIR_ERR_INTERNAL_ERROR, > + _("Failed to destroy domain '%d'"), dom->id); > + goto cleanup; > + } > And here call libxl_domain_destroy() directly instead of libxlVmReap(), allowing the event handler to cleanup the vm. Can you make these changes and ensure all the save/restore issues are resolved in your environment? Regards, Jim > + > + if (!vm->persistent) { > + virDomainRemoveInactive(&driver->domains, vm); > + vm = NULL; > + } > + ret = 0; > + } > +cleanup: > + VIR_FREE(xml); > + if (VIR_CLOSE(fd) < 0) > + virReportSystemError(errno, "%s", _("cannot close file")); > + if (vm) > + virDomainObjUnlock(vm); > + if (event) > + libxlDomainEventQueue(driver, event); > + libxlDriverUnlock(driver); > + return ret; > +} > + > +static int > +libxlDomainRestore(virConnectPtr conn, const char *from) > +{ > + libxlDriverPrivatePtr driver = conn->privateData; > + virDomainDefPtr def = NULL; > + virDomainObjPtr vm = NULL; > + libxlSavefileHeader hdr; > + char *xml = NULL; > + int fd; > + int ret = -1; > + > + libxlDriverLock(driver); > + > + if ((fd = virFileOpenAs(from, O_RDONLY, 0, getuid(), getgid(), 0)) < 0) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + "%s", _("cannot read domain image")); > + goto cleanup; > + } > + > + if (saferead(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + "%s", _("failed to read libxl header")); > + goto cleanup; > + } > + > + if (memcmp(hdr.magic, LIBXL_SAVE_MAGIC, sizeof(hdr.magic))) { > + libxlError(VIR_ERR_INVALID_ARG, "%s", _("image magic is incorrect")); > + goto cleanup; > + } > + > + if (hdr.version > LIBXL_SAVE_VERSION) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + _("image version is not supported (%d > %d)"), > + hdr.version, LIBXL_SAVE_VERSION); > + goto cleanup; > + } > + > + if (hdr.xmlLen <= 0) { > + libxlError(VIR_ERR_OPERATION_FAILED, > + _("invalid XML length: %d"), hdr.xmlLen); > + goto cleanup; > + } > + > + if (VIR_ALLOC_N(xml, hdr.xmlLen) < 0) { > + virReportOOMError(); > + goto cleanup; > + } > + > + if (saferead(fd, xml, hdr.xmlLen) != hdr.xmlLen) { > + libxlError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to read XML")); > + goto cleanup; > + } > + > + if (!(def = virDomainDefParseString(driver->caps, xml, > + VIR_DOMAIN_XML_INACTIVE))) > + goto cleanup; > + > + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) > + goto cleanup; > + > + if (!(vm = virDomainAssignDef(driver->caps, &driver->domains, def, true))) > + goto cleanup; > + > + def = NULL; > + > + if ((ret = libxlVmStart(driver, vm, false, fd)) < 0 && > + !vm->persistent) { > + virDomainRemoveInactive(&driver->domains, vm); > + vm = NULL; > + } > + > +cleanup: > + VIR_FREE(xml); > + virDomainDefFree(def); > + if (VIR_CLOSE(fd) < 0) > + virReportSystemError(errno, "%s", _("cannot close file")); > + if (vm) > + virDomainObjUnlock(vm); > + libxlDriverUnlock(driver); > + return ret; > +} > + > +static int > libxlDomainSetVcpusFlags(virDomainPtr dom, unsigned int nvcpus, > unsigned int flags) > { > @@ -2096,7 +2288,7 @@ libxlDomainCreateWithFlags(virDomainPtr dom, > goto cleanup; > } > > - ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0); > + ret = libxlVmStart(driver, vm, (flags & VIR_DOMAIN_START_PAUSED) != 0, -1); > > cleanup: > if (vm) > @@ -2722,6 +2914,8 @@ static virDriver libxlDriver = { > .domainSetMemoryFlags = libxlDomainSetMemoryFlags, /* 0.9.0 */ > .domainGetInfo = libxlDomainGetInfo, /* 0.9.0 */ > .domainGetState = libxlDomainGetState, /* 0.9.2 */ > + .domainSave = libxlDomainSave, /* 0.9.2 */ > + .domainRestore = libxlDomainRestore, /* 0.9.2 */ > .domainSetVcpus = libxlDomainSetVcpus, /* 0.9.0 */ > .domainSetVcpusFlags = libxlDomainSetVcpusFlags, /* 0.9.0 */ > .domainGetVcpusFlags = libxlDomainGetVcpusFlags, /* 0.9.0 */ > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list