Markus Groà wrote: > Quoting Jim Fehlig <jfehlig@xxxxxxxxxx>: > >> Markus Groà wrote: >>> For core dumping to work correctly the following patch >>> for xen is needed: >>> http://lists.xensource.com/archives/html/xen-devel/2011-05/msg01469.html >>> >>> >>> This patch is in xen-unstable and is considered for backport to >>> the xen stable branches. Without this patch the mapped >>> memory pages of the pv guest are not unmapped after core-dump. >>> >>> --- >>> src/libxl/libxl_driver.c | 87 >>> ++++++++++++++++++++++++++++++++++++++++++++++ >>> 1 files changed, 87 insertions(+), 0 deletions(-) >>> >>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c >>> index b2cc0e8..75008db 100644 >>> --- a/src/libxl/libxl_driver.c >>> +++ b/src/libxl/libxl_driver.c >>> @@ -1627,6 +1627,92 @@ libxlDomainGetState(virDomainPtr dom, >>> } >>> >>> static int >>> +libxlDomainCoreDump(virDomainPtr dom, const char *to, int flags) >>> +{ >>> + libxlDriverPrivatePtr driver = dom->conn->privateData; >>> + libxlDomainObjPrivatePtr priv; >>> + virDomainObjPtr vm; >>> + virDomainEventPtr event = NULL; >>> + int paused = 0; >>> + int ret = -1; >>> + >>> + virCheckFlags(VIR_DUMP_LIVE | VIR_DUMP_CRASH, -1); >>> + >>> + libxlDriverLock(driver); >>> + vm = virDomainFindByUUID(&driver->domains, dom->uuid); >>> + >>> + if (!vm) { >>> + char uuidstr[VIR_UUID_STRING_BUFLEN]; >>> + virUUIDFormat(dom->uuid, uuidstr); >>> + libxlError(VIR_ERR_NO_DOMAIN, >>> + _("No domain with matching uuid '%s'"), uuidstr); >>> + goto cleanup; >>> + } >>> + >>> + if (!virDomainObjIsActive(vm)) { >>> + libxlError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain is >>> not running")); >>> + goto cleanup; >>> + } >>> + >>> + priv = vm->privateData; >>> + >>> + if (!(flags & VIR_DUMP_LIVE) && >>> + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { >>> + if (libxl_domain_pause(&priv->ctx, dom->id) != 0) { >>> + libxlError(VIR_ERR_INTERNAL_ERROR, >>> + _("Failed to suspend domain '%d' with >>> libxenlight"), >>> + dom->id); >>> >> >> I think there could be a little more info in that error message, e.g. >> "Before dumping core, failed to suspend ...". >> >>> + goto cleanup; >>> + } >>> + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, >>> VIR_DOMAIN_PAUSED_DUMP); >>> + paused = 1; >>> + } >>> + >>> + if (libxl_domain_core_dump(&priv->ctx, dom->id, to) != 0) { >>> + libxlError(VIR_ERR_INTERNAL_ERROR, >>> + _("Failed to dump core of domain '%d' with >>> libxenlight"), >>> + dom->id); >>> + goto cleanup; >>> + } >>> >> >> If core dumping fails and the domain was paused, it won't be unpaused by >> jumping to cleanup. >> >>> + >>> + if (flags & VIR_DUMP_CRASH) { >>> + if (libxlVmReap(driver, vm, 1, VIR_DOMAIN_SHUTOFF_CRASHED) >>> != 0) { >>> + libxlError(VIR_ERR_INTERNAL_ERROR, >>> + _("Failed to destroy domain '%d'"), dom->id); >>> + goto cleanup; >>> + } >>> + >>> + event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, >>> + >>> VIR_DOMAIN_EVENT_STOPPED_CRASHED); >>> + >>> + } else if (paused) { >>> + if (libxl_domain_unpause(&priv->ctx, dom->id) != 0) { >>> + libxlError(VIR_ERR_INTERNAL_ERROR, >>> + _("Failed to resume domain '%d' with >>> libxenlight"), >>> + dom->id); >>> >> >> Here too more info in the error message, e.g. "After dumping core, >> failed to resume ..." >> >>> + goto cleanup; >>> + } >>> + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, >>> + VIR_DOMAIN_RUNNING_UNPAUSED); >>> + } >>> + >>> + if ((flags & VIR_DUMP_CRASH) && !vm->persistent) { >>> + virDomainRemoveInactive(&driver->domains, vm); >>> + vm = NULL; >>> + } >>> + >>> + ret = 0; >>> + >>> +cleanup: >>> + if (vm) >>> + virDomainObjUnlock(vm); >>> + if (event) >>> + libxlDomainEventQueue(driver, event); >>> + libxlDriverUnlock(driver); >>> >> >> Do we need to have the driver locked during the core dump? Most of the >> driver functions use this pattern >> >> libxlDriverLock(driver); >> vm = virDomainFindByUUID(&driver->domains, dom->uuid); >> libxlDriverUnlock(driver); >> >> Regards, >> Jim > > I think we do, since the calls to libxlVmReap and > virDomainRemoveInactive both use the driver as parameter and at least > virDomainRemoveInactive is able to modify the contents of it. Right, good point. > But if you can convince me otherwise we can unlock the driver directly > after obtaining the vm object. A core dump could take quite some time on a large memory domain. How about unlocking before invoking this long running operation and then reacquiring? > > I will post a reworked version of this patch on monday. Ok, thanks! Monday is a US holiday, but hopefully I'll have some time in the evening to review this and your other patches. We're running out of time before 0.9.2 release ... Regards, Jim -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list