On Wed, May 27, 2009 at 04:58:44PM +0100, Daniel P. Berrange wrote: > When the libvirtd daemon starts up, it reads all config files from > /etc/libvirt/qemu. For each config file loaded, it then probes for > a pidfile / live status XML config in /var/run/libvirt/qemu. > > In retrospect there is an obvious problem with this approach: it misses > all transient VMs which don't ever have config files in /etc/libvirt/qemu. > The core goal of this patch, is thus to change the way we deal with startup > to apply the following logic. > > - Scan & load all status files in /var/run/libvirt/qemu for running VMs > - Reconnect to the monitor for all VMs, and re-detect VCPU threads. > - Scan & load all inactive configs in /etc/libvirt/qemu > > This ensures we always re-detect all inactive and running VMs, whether > transient or persistent. > > It also fixes two other bugs, first we forgot to detect VCPU threads, > so vCPU affinity functions were broken, and it also re-reserves the MCS > category with selinux so it is not re-used later. > > Finally, if the attempt to reconnect to the QEMU monitor for a running > VM fails, then we explicitly kill off that VM, rather than just ignoring > it. This avoids leaving orphaned QEMU processes. > > The patch is larger than anticipated, because I moved all the status Well that's a lot of changes too ! But this sounds good. Maybe we shoudl try some testing like repeating /etc/init.d/libvirtd start and stop a few hundred times while keeping some activity and see what's left at the end ... > file code out of QEMU driver into the generic domain_conf.c file. This > enables both scanning loops to be done via the single method > virDomainLoadAllConfigs, just by pasing different directories. I also > want to re-use this code in the LXC and UML drivers to improve bugs in > their logic okay, so there is some general refactoring too. [...] > + xmlXPathContextPtr ctxt) > +{ > + char *tmp = NULL; > + long val; > + xmlNodePtr config; > + xmlNodePtr oldctxt; I would s/oldctxt/oldnode/ as what is saved is really only the old XPath current node not the context itself. [...] > +char *virDomainObjFormat(virConnectPtr conn, > + virDomainObjPtr obj, > + int flags) > +{ > + char *config_xml = NULL, *xml = NULL; > + virBuffer buf = VIR_BUFFER_INITIALIZER; > + > + virBufferVSprintf(&buf, "<domstatus state='%s' pid='%d'>\n", > + virDomainStateTypeToString(obj->state), > + obj->pid); > + virBufferEscapeString(&buf, " <monitor path='%s'/>\n", obj->monitorpath); > + > + if (!(config_xml = virDomainDefFormat(conn, > + obj->def, > + flags))) Hum we are leaking the buffer content here. > + goto cleanup; > + > + virBufferAdd(&buf, config_xml, strlen(config_xml)); > + virBufferAddLit(&buf, "</domstatus>\n"); > + > + xml = virBufferContentAndReset(&buf); > +cleanup: > + VIR_FREE(config_xml); > + return xml; > + > +} [...] > +static virDomainObjPtr virDomainLoadStatus(virConnectPtr conn, > + virCapsPtr caps, > + virDomainObjListPtr doms, > + const char *statusDir, > + const char *name, > + virDomainLoadConfigNotify notify, > + void *opaque) > +{ > + char *statusFile = NULL; > + virDomainObjPtr obj = NULL; > + virDomainObjPtr tmp = NULL; > + > + if ((statusFile = virDomainConfigFile(conn, statusDir, name)) == NULL) > + goto error; > + > + if (!(obj = virDomainObjParseFile(conn, caps, statusFile))) > + goto error; > + > + tmp = virDomainFindByName(doms, obj->def->name); > + if (tmp) { > + virDomainObjUnlock(obj); > + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, > + _("unexpected domain %s already exists"), obj->def->name); let's wrap to 80 columns [...] > +/* > + * Open an existing VM's monitor, and re-detect VCPUs > + * threads maybe update the comment about the security labels too, especially as this is a bit arcane. > + */ > +static int > +qemuReconnectDomain(struct qemud_driver *driver, > + virDomainObjPtr obj) > +{ > + int rc; > + > + if ((rc = qemudOpenMonitor(NULL, driver, obj, obj->monitorpath, 1)) != 0) { > + VIR_ERROR(_("Failed to reconnect monitor for %s: %d\n"), > + obj->def->name, rc); > + goto error; > + } > + > + if (qemudDetectVcpuPIDs(NULL, obj) < 0) { > + goto error; > + } > + > + if (obj->def->seclabel.type == VIR_DOMAIN_SECLABEL_DYNAMIC && > + driver->securityDriver && > + driver->securityDriver->domainReserveSecurityLabel && > + driver->securityDriver->domainReserveSecurityLabel(NULL, obj) < 0) > + return -1; > + > + if (obj->def->id >= driver->nextvmid) > + driver->nextvmid = obj->def->id + 1; > + > + return 0; > + > +error: > + return -1; > +} > + [...] > @@ -1331,6 +1330,7 @@ static int qemudStartVMDaemon(virConnect > int pos = -1; > char ebuf[1024]; > char *pidfile = NULL; > + int logfile; > > struct gemudHookData hookData; > hookData.conn = conn; > @@ -1372,7 +1372,7 @@ static int qemudStartVMDaemon(virConnect > goto cleanup; > } > > - if((vm->logfile = qemudLogFD(conn, driver->logDir, vm->def->name)) < 0) > + if ((logfile = qemudLogFD(conn, driver->logDir, vm->def->name)) < 0) > goto cleanup; > > emulator = vm->def->emulator; > @@ -1421,29 +1421,29 @@ static int qemudStartVMDaemon(virConnect > > tmp = progenv; > while (*tmp) { > - if (safewrite(vm->logfile, *tmp, strlen(*tmp)) < 0) > + if (safewrite(logfile, *tmp, strlen(*tmp)) < 0) > VIR_WARN(_("Unable to write envv to logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > - if (safewrite(vm->logfile, " ", 1) < 0) > + if (safewrite(logfile, " ", 1) < 0) > VIR_WARN(_("Unable to write envv to logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > tmp++; > } > tmp = argv; > while (*tmp) { > - if (safewrite(vm->logfile, *tmp, strlen(*tmp)) < 0) > + if (safewrite(logfile, *tmp, strlen(*tmp)) < 0) > VIR_WARN(_("Unable to write argv to logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > - if (safewrite(vm->logfile, " ", 1) < 0) > + if (safewrite(logfile, " ", 1) < 0) > VIR_WARN(_("Unable to write argv to logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > tmp++; > } > - if (safewrite(vm->logfile, "\n", 1) < 0) > + if (safewrite(logfile, "\n", 1) < 0) > VIR_WARN(_("Unable to write argv to logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > > - if ((pos = lseek(vm->logfile, 0, SEEK_END)) < 0) > + if ((pos = lseek(logfile, 0, SEEK_END)) < 0) > VIR_WARN(_("Unable to seek to end of logfile: %s\n"), > virStrerror(errno, ebuf, sizeof ebuf)); > > @@ -1451,7 +1451,7 @@ static int qemudStartVMDaemon(virConnect > FD_SET(tapfds[i], &keepfd); > > ret = virExecDaemonize(conn, argv, progenv, &keepfd, &child, > - stdin_fd, &vm->logfile, &vm->logfile, > + stdin_fd, &logfile, &logfile, > VIR_EXEC_NONBLOCK, > qemudSecurityHook, &hookData, > pidfile); > @@ -1501,7 +1501,7 @@ static int qemudStartVMDaemon(virConnect > (qemudInitCpus(conn, vm, migrateFrom) < 0) || > (qemudInitPasswords(conn, driver, vm) < 0) || > (qemudDomainSetMemoryBalloon(conn, vm, vm->def->memory) < 0) || > - (qemudSaveDomainStatus(conn, qemu_driver, vm) < 0)) { > + (virDomainSaveStatus(conn, driver->stateDir, vm) < 0)) { > qemudShutdownVMDaemon(conn, driver, vm); > ret = -1; > /* No need for 'goto cleanup' now since qemudShutdownVMDaemon does enough */ > @@ -1519,10 +1519,8 @@ cleanup: > vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && > vm->def->graphics[0]->data.vnc.autoport) > vm->def->graphics[0]->data.vnc.port = -1; > - if (vm->logfile != -1) { > - close(vm->logfile); > - vm->logfile = -1; > - } > + if (logfile != -1) > + close(logfile); > vm->def->id = -1; > return -1; > } Hum, do we still use vm->logfile field then ? Maybe I didn't see the place where it was removed from the structure. Looks good except for the couple of things raised earlier, thanks ! ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list