Currently the lxc controller process itself is responsible for daemonizing itself into the background and writing out its pid file. The lxc driver would fork the controller and then attempt to connect to the lxc monitor. This connection would only succeed after the controller has backgrounded itself, setup cgroups and written its pid file, so startup was race free. The problem is that we need to delay create of the cgroups to much later, such that we can tell systemd the container init pid when we create the cgroups. If we delay cgroup creation though the current synchronization won't work. A second problem is that the controller needs the XML config of the guest. Currently we write out the plain virDomainDefPtr XML before starting the controller, and then later replace it with the full virDomainObjPtr status XML. This is kind of gross and also means that the controller doesn't get a record of the live XML config right away. This means it doesn't have a record of the veth device names either and so can't give that info to systemd when creating the cgroups. To address this we change the startup sequencing. The goal is that we want to get the PID as soon as possible, before the LXC controller even starts. So we stop letting the LXC controller daemonize itself, and instead use virCommand's built-in capabilities. This daemonizes and writes the PID before LXC controller is exec'd. So the driver can read the PID as soon as virCommandRun returns. It is no longer safe to connect to the monitor or detect the cgroups though. Fortunately the LXC controller already has a second point of synchronization. Immediately before its event loop starts running, it performs a handshake with the driver. So we move the opening of the monitor connection and cgroup detection after this synchronization point. --- src/lxc/lxc_process.c | 88 +++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 6bdfe3d..764cdab 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -750,7 +750,9 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, size_t nttyFDs, int *files, size_t nfiles, - int handshakefd) + int handshakefd, + int logfd, + const char *pidfile) { size_t i; char *filterstr; @@ -812,12 +814,15 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, virCommandAddArg(cmd, "--handshake"); virCommandAddArgFormat(cmd, "%d", handshakefd); - virCommandAddArg(cmd, "--background"); for (i = 0; i < nveths; i++) virCommandAddArgList(cmd, "--veth", veths[i], NULL); virCommandPassFD(cmd, handshakefd, 0); + virCommandDaemonize(cmd); + virCommandSetPidFile(cmd, pidfile); + virCommandSetOutputFD(cmd, &logfd); + virCommandSetErrorFD(cmd, &logfd); return cmd; cleanup: @@ -1189,10 +1194,10 @@ int virLXCProcessStart(virConnectPtr conn, nveths, veths, ttyFDs, nttyFDs, files, nfiles, - handshakefds[1]))) + handshakefds[1], + logfd, + pidfile))) goto cleanup; - virCommandSetOutputFD(cmd, &logfd); - virCommandSetErrorFD(cmd, &logfd); /* now that we know it is about to start call the hook if present */ if (virHookPresent(VIR_HOOK_DRIVER_LXC)) { @@ -1245,28 +1250,7 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } - - if (VIR_CLOSE(handshakefds[1]) < 0) { - virReportSystemError(errno, "%s", _("could not close handshake fd")); - goto cleanup; - } - - /* Connect to the controller as a client *first* because - * this will block until the child has written their - * pid file out to disk & created their cgroup */ - if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) { - /* Intentionally overwrite the real monitor error message, - * since a better one is almost always found in the logs - */ - if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) { - virResetLastError(); - virReportError(VIR_ERR_INTERNAL_ERROR, - _("guest failed to start: %s"), ebuf); - } - goto cleanup; - } - - /* And get its pid */ + /* It has started running, so get its pid */ if ((r = virPidFileReadPath(pidfile, &vm->pid)) < 0) { if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) virReportError(VIR_ERR_INTERNAL_ERROR, @@ -1278,26 +1262,17 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } - if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, - vm->def->resource ? - vm->def->resource->partition : - NULL, - -1, &priv->cgroup) < 0) - goto error; - - if (!priv->cgroup) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("No valid cgroup for machine %s"), - vm->def->name); - goto error; - } - priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED; priv->wantReboot = false; vm->def->id = vm->pid; virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); priv->doneStopEvent = false; + if (VIR_CLOSE(handshakefds[1]) < 0) { + virReportSystemError(errno, "%s", _("could not close handshake fd")); + goto error; + } + if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback) driver->inhibitCallback(true, driver->inhibitOpaque); @@ -1312,6 +1287,37 @@ int virLXCProcessStart(virConnectPtr conn, goto error; } + /* We know the cgroup must exist by this synchronization + * point so lets detect that first, since it gives us a + * more reliable way to kill everything off if something + * goes wrong from here onwards ... */ + if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, + vm->def->resource ? + vm->def->resource->partition : + NULL, + -1, &priv->cgroup) < 0) + goto error; + + if (!priv->cgroup) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("No valid cgroup for machine %s"), + vm->def->name); + goto error; + } + + /* And we can get the first monitor connection now too */ + if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) { + /* Intentionally overwrite the real monitor error message, + * since a better one is almost always found in the logs + */ + if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) { + virResetLastError(); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("guest failed to start: %s"), ebuf); + } + goto error; + } + if (autoDestroy && virCloseCallbacksSet(driver->closeCallbacks, vm, conn, lxcProcessAutoDestroy) < 0) -- 2.1.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list