The current LXC I/O controller looks for HUP to detect when a guest has quit. This isn't reliable as during initial bootup it is possible that 'init' will close the console and let mingetty re-open it. The shutdown of containers was also flakey because it only killed the libvirt I/O controller and expected container processes to gracefully follow. Change the I/O controller such that when it see HUP or an I/O error, it uses kill($PID, 0) to see if the process has really quit. Change the container shutdown sequence to use the virCgroupKillPainfully function to ensure every really goes away * docs/drvlxc.html.in: Document that certain cgroups controllers are now mandatory * src/lxc/lxc_controller.c: Check if PID is still alive before quitting on I/O error/HUP * src/lxc/lxc_driver.c: Use virCgroupKillPainfully --- docs/drvlxc.html.in | 18 ++++++ src/lxc/lxc_controller.c | 42 ++++++++++---- src/lxc/lxc_driver.c | 142 ++++++++++++++++------------------------------ 3 files changed, 98 insertions(+), 104 deletions(-) diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index 35058c4..3e715b1 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -9,6 +9,24 @@ light-weight "application container" which does not have it's own root image. Y start it using </p> +<h2>Cgroups Requirements</h2> + +<p> +The libvirt LXC driver requires that certain cgroups controllers are +mounted on the host OS. The minimum required controllers are 'cpuacct', +'memory' and 'devices', while recommended extra controllers are +'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf & cgconfig +init service used to mount cgroups at host boot time. To manually +mount them use. NB, the blkio controller in some kernels will not +allow creation of nested sub-directories which will prevent correct +operation of the libvirt LXC driver. On such kernels the blkio controller +must not be mounted. +</p> + +<pre> + # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio +</pre> + <h3>Example config version 1</h3> <p></p> <pre> diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index b742a33..0cd7dc4 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -31,6 +31,7 @@ #include <sys/un.h> #include <unistd.h> #include <paths.h> +#include <errno.h> #include <fcntl.h> #include <signal.h> #include <getopt.h> @@ -119,12 +120,10 @@ static int lxcSetContainerResources(virDomainDefPtr def) virReportSystemError(-rc, _("Unable to set memory limit for domain %s"), def->name); - /* Don't fail if we can't set memory due to lack of kernel support */ - if (rc != -ENOENT) - goto cleanup; + goto cleanup; } - if(def->mem.hard_limit) { + if (def->mem.hard_limit) { rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit); if (rc != 0) { virReportSystemError(-rc, @@ -134,7 +133,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } - if(def->mem.soft_limit) { + if (def->mem.soft_limit) { rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit); if (rc != 0) { virReportSystemError(-rc, @@ -144,7 +143,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } - if(def->mem.swap_hard_limit) { + if (def->mem.swap_hard_limit) { rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit); if (rc != 0) { virReportSystemError(-rc, @@ -323,6 +322,18 @@ ignorable_epoll_accept_errno(int errnum) || errnum == EWOULDBLOCK); } +static bool +lxcPidGone(pid_t container) +{ + waitpid(container, NULL, WNOHANG); + + if (kill(container, 0) < 0 && + errno == ESRCH) + return true; + + return false; +} + /** * lxcControllerMain * @monitor: server socket fd to accept client requests @@ -340,7 +351,8 @@ ignorable_epoll_accept_errno(int errnum) static int lxcControllerMain(int monitor, int client, int appPty, - int contPty) + int contPty, + pid_t container) { int rc = -1; int epollFd; @@ -446,7 +458,13 @@ static int lxcControllerMain(int monitor, ++numActive; } } else if (epollEvent.events & EPOLLHUP) { - VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd); + if (lxcPidGone(container)) + goto cleanup; + curFdOff = epollEvent.data.fd == appPty ? 0 : 1; + if (fdArray[curFdOff].active) { + fdArray[curFdOff].active = 0; + --numActive; + } continue; } else { lxcError(VIR_ERR_INTERNAL_ERROR, @@ -485,7 +503,9 @@ static int lxcControllerMain(int monitor, --numActive; fdArray[curFdOff].active = 0; } else if (-1 == rc) { - goto cleanup; + if (lxcPidGone(container)) + goto cleanup; + continue; } } @@ -564,7 +584,7 @@ lxcControllerRun(virDomainDefPtr def, int rc = -1; int control[2] = { -1, -1}; int containerPty = -1; - char *containerPtyPath; + char *containerPtyPath = NULL; pid_t container = -1; virDomainFSDefPtr root; char *devpts = NULL; @@ -683,7 +703,7 @@ lxcControllerRun(virDomainDefPtr def, if (lxcControllerClearCapabilities() < 0) goto cleanup; - rc = lxcControllerMain(monitor, client, appPty, containerPty); + rc = lxcControllerMain(monitor, client, appPty, containerPty, container); cleanup: VIR_FREE(devptmx); diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 625777a..e47c201 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -953,36 +953,16 @@ cleanup: * @driver: pointer to driver structure * @vm: pointer to VM to clean up * - * waitpid() on the container process. kill and wait the tty process - * This is called by both lxcDomainDestroy and lxcSigHandler when a - * container exits. - * - * Returns 0 on success or -1 in case of error + * Cleanout resources associated with the now dead VM + * */ -static int lxcVmCleanup(lxc_driver_t *driver, +static void lxcVmCleanup(lxc_driver_t *driver, virDomainObjPtr vm) { - int rc = 0; - int waitRc; - int childStatus = -1; virCgroupPtr cgroup; int i; lxcDomainObjPrivatePtr priv = vm->privateData; - while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) && - errno == EINTR) - ; /* empty */ - - if ((waitRc != vm->pid) && (errno != ECHILD)) { - virReportSystemError(errno, - _("waitpid failed to wait for container %d: %d"), - vm->pid, waitRc); - rc = -1; - } else if (WIFEXITED(childStatus)) { - VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus)); - rc = -1; - } - /* now that we know it's stopped call the hook if present */ if (virHookPresent(VIR_HOOK_DRIVER_LXC)) { char *xml = virDomainDefFormat(vm->def, 0); @@ -1022,8 +1002,6 @@ static int lxcVmCleanup(lxc_driver_t *driver, vm->def->id = -1; vm->newDef = NULL; } - - return rc; } /** @@ -1182,11 +1160,10 @@ error: static int lxcVmTerminate(lxc_driver_t *driver, - virDomainObjPtr vm, - int signum) + virDomainObjPtr vm) { - if (signum == 0) - signum = SIGINT; + virCgroupPtr group = NULL; + int rc; if (vm->pid <= 0) { lxcError(VIR_ERR_INTERNAL_ERROR, @@ -1194,18 +1171,30 @@ static int lxcVmTerminate(lxc_driver_t *driver, return -1; } - if (kill(vm->pid, signum) < 0) { - if (errno != ESRCH) { - virReportSystemError(errno, - _("Failed to kill pid %d"), - vm->pid); - return -1; - } - } + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) + return -1; + rc = virCgroupKillPainfully(group); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Failed to kill container PIDs")); + rc = -1; + goto cleanup; + } + if (rc == 1) { + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Some container PIDs refused to die")); + rc = -1; + goto cleanup; + } + lxcVmCleanup(driver, vm); vm->state = VIR_DOMAIN_SHUTDOWN; - return lxcVmCleanup(driver, vm); + rc = 0; + +cleanup: + virCgroupFree(&group); + return rc; } static void lxcMonitorEvent(int watch, @@ -1229,7 +1218,7 @@ static void lxcMonitorEvent(int watch, goto cleanup; } - if (lxcVmTerminate(driver, vm, SIGINT) < 0) { + if (lxcVmTerminate(driver, vm) < 0) { virEventRemoveHandle(watch); } else { event = virDomainEventNewFromObj(vm, @@ -1544,7 +1533,7 @@ static int lxcVmStart(virConnectPtr conn, VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP, lxcMonitorEvent, vm, NULL)) < 0) { - lxcVmTerminate(driver, vm, 0); + lxcVmTerminate(driver, vm); goto cleanup; } @@ -1712,55 +1701,6 @@ cleanup: return dom; } -/** - * lxcDomainShutdown: - * @dom: pointer to domain to shutdown - * - * Sends SIGINT to container root process to request it to shutdown - * - * Returns 0 on success or -1 in case of error - */ -static int lxcDomainShutdown(virDomainPtr dom) -{ - lxc_driver_t *driver = dom->conn->privateData; - virDomainObjPtr vm; - virDomainEventPtr event = NULL; - int ret = -1; - - lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, dom->uuid); - if (!vm) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(dom->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); - goto cleanup; - } - - if (!virDomainObjIsActive(vm)) { - lxcError(VIR_ERR_OPERATION_INVALID, - "%s", _("Domain is not running")); - goto cleanup; - } - - ret = lxcVmTerminate(driver, vm, 0); - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN); - if (!vm->persistent) { - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - -cleanup: - if (vm) - virDomainObjUnlock(vm); - if (event) - lxcDomainEventQueue(driver, event); - lxcDriverUnlock(driver); - return ret; -} - static int lxcDomainEventRegister(virConnectPtr conn, @@ -1928,7 +1868,7 @@ static int lxcDomainDestroy(virDomainPtr dom) goto cleanup; } - ret = lxcVmTerminate(driver, vm, SIGKILL); + ret = lxcVmTerminate(driver, vm); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_DESTROYED); @@ -2057,7 +1997,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque) VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP, lxcMonitorEvent, vm, NULL)) < 0) { - lxcVmTerminate(driver, vm, 0); + lxcVmTerminate(driver, vm); goto cleanup; } } else { @@ -2124,8 +2064,24 @@ static int lxcStartup(int privileged) rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1); if (rc < 0) { char buf[1024]; - VIR_WARN("Unable to create cgroup for driver: %s", + VIR_WARN("Unable to create cgroup, disabling LXC driver: %s", virStrerror(-rc, buf, sizeof(buf))); + goto cleanup; + } + if (!virCgroupMounted(lxc_driver->cgroup, + VIR_CGROUP_CONTROLLER_CPUACCT)) { + VIR_WARN0("Unable to find 'cpuacct' cgroups controller mount"); + goto cleanup; + } + if (!virCgroupMounted(lxc_driver->cgroup, + VIR_CGROUP_CONTROLLER_DEVICES)) { + VIR_WARN0("Unable to find 'devices' cgroups controller mount"); + goto cleanup; + } + if (!virCgroupMounted(lxc_driver->cgroup, + VIR_CGROUP_CONTROLLER_MEMORY)) { + VIR_WARN0("Unable to find 'memory' cgroups controller mount"); + goto cleanup; } /* Call function to load lxc driver configuration information */ @@ -2845,7 +2801,7 @@ static virDriver lxcDriver = { lxcDomainLookupByName, /* domainLookupByName */ lxcDomainSuspend, /* domainSuspend */ lxcDomainResume, /* domainResume */ - lxcDomainShutdown, /* domainShutdown */ + NULL, /* domainShutdown */ NULL, /* domainReboot */ lxcDomainDestroy, /* domainDestroy */ lxcGetOSType, /* domainGetOSType */ -- 1.7.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list