Extend `lxcContainerStart` with support for restore from fd of directory that contains saved state of lxc container. Signed-off-by: Radostin Stoyanov <rstoyanov1@xxxxxxxxx> --- src/lxc/lxc_container.c | 162 +++++++++++++++++++++++++++++++++++++++++++++-- src/lxc/lxc_container.h | 3 +- src/lxc/lxc_controller.c | 104 ++++++++++++++++++++++++++++-- src/lxc/lxc_driver.c | 4 +- src/lxc/lxc_process.c | 23 +++++-- src/lxc/lxc_process.h | 1 + 6 files changed, 280 insertions(+), 17 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 532fd0be0..6cd203d7f 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -69,6 +69,8 @@ #include "virprocess.h" #include "virstring.h" +#include "lxc_criu.h" + #define VIR_FROM_THIS VIR_FROM_LXC VIR_LOG_INIT("lxc.lxc_container"); @@ -111,6 +113,7 @@ struct __lxc_child_argv { char **ttyPaths; int handshakefd; int *nsInheritFDs; + int restorefd; }; static int lxcContainerMountFSBlock(virDomainFSDefPtr fs, @@ -263,6 +266,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef, * @ttyfd: FD of tty to set as the container console * @npassFDs: number of extra FDs * @passFDs: list of extra FDs + * @restorefd: FD of folder where container was dumped * * Setup file descriptors in the container. @ttyfd is set to be * the container's stdin, stdout & stderr. Any FDs included in @@ -272,7 +276,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef, * Returns 0 on success or -1 in case of error */ static int lxcContainerSetupFDs(int *ttyfd, - size_t npassFDs, int *passFDs) + size_t npassFDs, int *passFDs, int restorefd) { int rc = -1; int open_max; @@ -368,6 +372,8 @@ static int lxcContainerSetupFDs(int *ttyfd, } for (fd = last_fd + 1; fd < open_max; fd++) { + if (fd == restorefd) + continue; int tmpfd = fd; VIR_MASS_CLOSE(tmpfd); } @@ -1083,6 +1089,31 @@ static int lxcContainerMountFSDev(virDomainDefPtr def, return ret; } +static int lxcContainerMountFSDevPTSRestore(virDomainDefPtr def, + const char *stateDir) +{ + int ret = -1; + char *path = NULL; + int flags = MS_MOVE; + + VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir); + + if (virAsprintf(&path, "%s/%s.devpts", stateDir, def->name) < 0) + return ret; + + VIR_DEBUG("Trying to move %s to /dev/pts", path); + + if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) { + virReportSystemError(errno, _("Failed to mount %s on /dev/pts"), path); + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(path); + return ret; +} + static int lxcContainerMountFSDevPTS(virDomainDefPtr def, const char *stateDir) { @@ -2191,6 +2222,116 @@ static int lxcContainerSetHostname(virDomainDefPtr def) return ret; } +/* + * lxcContainerChildRestore: + * @data: pointer to container arguments + */ +static int lxcContainerChildRestore(void *data) +{ + lxc_child_argv_t *argv = data; + virDomainDefPtr vmDef = argv->config; + int ttyfd = -1; + int ret = -1; + char *ttyPath = NULL; + virDomainFSDefPtr root; + char *sec_mount_options = NULL; + char *stateDir = NULL; + + if (vmDef == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("lxcChild() passed invalid vm definition")); + goto cleanup; + } + + if (lxcContainerWaitForContinue(argv->monitor) < 0) { + virReportSystemError(errno, "%s", + _("Failed to read the container continue message")); + goto cleanup; + } + VIR_DEBUG("Received container continue message"); + + if (lxcContainerSetID(vmDef) < 0) + goto cleanup; + + root = virDomainGetFilesystemForTarget(vmDef, "/"); + + if (argv->nttyPaths) { + const char *tty = argv->ttyPaths[0]; + if (STRPREFIX(tty, "/dev/pts/")) + tty += strlen("/dev/pts/"); + if (virAsprintf(&ttyPath, "%s/%s.devpts/%s", + LXC_STATE_DIR, vmDef->name, tty) < 0) + goto cleanup; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("At least one tty is required")); + goto cleanup; + } + + VIR_DEBUG("Container TTY path: %s", ttyPath); + + ttyfd = open(ttyPath, O_RDWR); + if (ttyfd < 0) { + virReportSystemError(errno, _("Failed to open tty %s"), ttyPath); + goto cleanup; + } + VIR_DEBUG("Container TTY fd: %d", ttyfd); + + if (!(sec_mount_options = virSecurityManagerGetMountOptions( + argv->securityDriver, + vmDef))) + goto cleanup; + + if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0) + goto cleanup; + + if (lxcContainerSendContinue(argv->handshakefd) < 0) { + virReportSystemError(errno, "%s", + _("Failed to send continue signal to controller")); + goto cleanup; + } + + VIR_DEBUG("Setting up container's std streams"); + + if (lxcContainerSetupFDs(&ttyfd, argv->npassFDs, + argv->passFDs, argv->restorefd) < 0) + goto cleanup; + + if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0) + goto cleanup; + + /* Mounts /dev/pts */ + if (lxcContainerMountFSDevPTSRestore(vmDef, stateDir) < 0) { + virReportSystemError(errno, "%s", _("Failed to mount dev/pts")); + goto cleanup; + } + + if (setsid() < 0) + virReportSystemError(errno, "%s", _("Unable to become session leader")); + + VIR_DEBUG("Executing container restore criu function"); + ret = lxcCriuRestore(vmDef, argv->restorefd, 0); + + cleanup: + VIR_FORCE_CLOSE(argv->monitor); + VIR_FORCE_CLOSE(argv->handshakefd); + VIR_FORCE_CLOSE(ttyfd); + VIR_FREE(ttyPath); + VIR_FREE(stateDir); + VIR_FREE(sec_mount_options); + + if (ret != 0) { + VIR_DEBUG("Tearing down container"); + fprintf(stderr, + _("Failure in libvirt_lxc startup: %s\n"), + virGetLastErrorMessage()); + } + + return ret; +} + + + /** * lxcContainerChild: * @data: pointer to container arguments @@ -2322,7 +2463,7 @@ static int lxcContainerChild(void *data) VIR_FORCE_CLOSE(argv->handshakefd); VIR_FORCE_CLOSE(argv->monitor); if (lxcContainerSetupFDs(&ttyfd, - argv->npassFDs, argv->passFDs) < 0) + argv->npassFDs, argv->passFDs, -1) < 0) goto cleanup; /* Make init process of the container the leader of the new session. @@ -2403,6 +2544,7 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) * @veths: interface names * @control: control FD to the container * @ttyPath: path of tty to set as the container console + * @restorefd: FD to folder where container was dumped * * Starts a container process by calling clone() with the namespace flags * @@ -2418,7 +2560,8 @@ int lxcContainerStart(virDomainDefPtr def, int handshakefd, int *nsInheritFDs, size_t nttyPaths, - char **ttyPaths) + char **ttyPaths, + int restorefd) { pid_t pid; int cflags; @@ -2436,6 +2579,7 @@ int lxcContainerStart(virDomainDefPtr def, .ttyPaths = ttyPaths, .handshakefd = handshakefd, .nsInheritFDs = nsInheritFDs, + .restorefd = restorefd, }; /* allocate a stack for the container */ @@ -2484,10 +2628,16 @@ int lxcContainerStart(virDomainDefPtr def, VIR_DEBUG("Inheriting a UTS namespace"); } - VIR_DEBUG("Cloning container init process"); - pid = clone(lxcContainerChild, stacktop, cflags, &args); + if (restorefd != -1) { + VIR_DEBUG("Cloning container process that will spawn criu restore"); + pid = clone(lxcContainerChildRestore, stacktop, SIGCHLD, &args); + } else { + VIR_DEBUG("Cloning container init process"); + pid = clone(lxcContainerChild, stacktop, cflags, &args); + VIR_DEBUG("clone() completed, new container PID is %d", pid); + } + VIR_FREE(stack); - VIR_DEBUG("clone() completed, new container PID is %d", pid); if (pid < 0) { virReportSystemError(errno, "%s", diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 641e2d460..9a6ac2073 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -58,7 +58,8 @@ int lxcContainerStart(virDomainDefPtr def, int handshakefd, int *nsInheritFDs, size_t nttyPaths, - char **ttyPaths); + char **ttyPaths, + int restorefd); int lxcContainerSetupHostdevCapsMakePath(const char *dev); diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 507bffda0..a5eb5e336 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -146,6 +146,8 @@ struct _virLXCController { virCgroupPtr cgroup; virLXCFusePtr fuse; + + int restore; }; #include "lxc_controller_dispatch.h" @@ -1015,6 +1017,65 @@ static int lxcControllerClearCapabilities(void) return 0; } +static int +lxcControllerFindRestoredPid(int fd) +{ + int initpid = 0; + int ret = -1; + char *checkpointdir = NULL; + char *pidfile = NULL; + char *checkpointfd = NULL; + int pidfilefd; + char c; + + if (fd < 0) + goto cleanup; + + if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", fd) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to write checkpoint dir path")); + goto cleanup; + } + + if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to readlink checkpoint dir path")); + goto cleanup; + } + + if (virAsprintf(&pidfile, "%s/pidfile", checkpointdir) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to write pidfile path")); + goto cleanup; + } + + if ((pidfilefd = virFileOpenAs(pidfile, O_RDONLY, 0, -1, -1, 0)) < 0) { + virReportSystemError(pidfilefd, + _("Failed to open domain's pidfile '%s'"), + pidfile); + goto cleanup; + } + + while ((saferead(pidfilefd, &c, 1) == 1) && c != EOF) + initpid = initpid*10 + c - '0'; + + ret = initpid; + + if (virFileRemove(pidfile, -1, -1) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to delete pidfile path")); + } + + cleanup: + VIR_FORCE_CLOSE(fd); + VIR_FORCE_CLOSE(pidfilefd); + VIR_FREE(pidfile); + VIR_FREE(checkpointdir); + VIR_FREE(checkpointfd); + return ret; +} + + static bool wantReboot; static virMutex lock = VIR_MUTEX_INITIALIZER; @@ -2327,6 +2388,7 @@ virLXCControllerRun(virLXCControllerPtr ctrl) int containerhandshake[2] = { -1, -1 }; char **containerTTYPaths = NULL; size_t i; + bool restore_mode = (ctrl->restore != -1); if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0) goto cleanup; @@ -2383,7 +2445,8 @@ virLXCControllerRun(virLXCControllerPtr ctrl) containerhandshake[1], ctrl->nsFDs, ctrl->nconsoles, - containerTTYPaths)) < 0) + containerTTYPaths, + ctrl->restore)) < 0) goto cleanup; VIR_FORCE_CLOSE(control[1]); VIR_FORCE_CLOSE(containerhandshake[1]); @@ -2395,10 +2458,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl) for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) VIR_FORCE_CLOSE(ctrl->nsFDs[i]); - if (virLXCControllerSetupCgroupLimits(ctrl) < 0) + if (!restore_mode && virLXCControllerSetupCgroupLimits(ctrl) < 0) goto cleanup; - if (virLXCControllerSetupUserns(ctrl) < 0) + if (!restore_mode && virLXCControllerSetupUserns(ctrl) < 0) goto cleanup; if (virLXCControllerMoveInterfaces(ctrl) < 0) @@ -2423,6 +2486,26 @@ virLXCControllerRun(virLXCControllerPtr ctrl) if (lxcControllerClearCapabilities() < 0) goto cleanup; + if (restore_mode) { + int status; + int ret = waitpid(-1, &status, 0); + VIR_DEBUG("Got sig child %d", ret); + + /* There could be two cases here: + * 1. CRIU died bacause of restore error and the container is not running + * 2. CRIU detached itself from the running container + */ + int initpid; + if ((initpid = lxcControllerFindRestoredPid(ctrl->restore)) < 0) { + virReportSystemError(errno, "%s", + _("Unable to get restored task pid")); + virNetDaemonQuit(ctrl->daemon); + goto cleanup; + } + + ctrl->initpid = initpid; + } + for (i = 0; i < ctrl->nconsoles; i++) if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0) goto cleanup; @@ -2466,6 +2549,7 @@ int main(int argc, char *argv[]) int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST]; int handshakeFd = -1; bool bg = false; + int restore = -1; const struct option options[] = { { "background", 0, NULL, 'b' }, { "name", 1, NULL, 'n' }, @@ -2477,6 +2561,7 @@ int main(int argc, char *argv[]) { "share-net", 1, NULL, 'N' }, { "share-ipc", 1, NULL, 'I' }, { "share-uts", 1, NULL, 'U' }, + { "restore", 1, NULL, 'r' }, { "help", 0, NULL, 'h' }, { 0, 0, 0, 0 }, }; @@ -2504,7 +2589,7 @@ int main(int argc, char *argv[]) while (1) { int c; - c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:", + c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:r:", options, NULL); if (c == -1) @@ -2580,6 +2665,14 @@ int main(int argc, char *argv[]) securityDriver = optarg; break; + case 'r': + if (virStrToLong_i(optarg, NULL, 10, &restore) < 0) { + fprintf(stderr, "malformed --restore argument '%s'", + optarg); + goto cleanup; + } + break; + case 'h': case '?': fprintf(stderr, "\n"); @@ -2596,6 +2689,7 @@ int main(int argc, char *argv[]) fprintf(stderr, " -N FD, --share-net FD\n"); fprintf(stderr, " -I FD, --share-ipc FD\n"); fprintf(stderr, " -U FD, --share-uts FD\n"); + fprintf(stderr, " -r FD, --restore FD\n"); fprintf(stderr, " -h, --help\n"); fprintf(stderr, "\n"); rc = 0; @@ -2648,6 +2742,8 @@ int main(int argc, char *argv[]) ctrl->passFDs = passFDs; ctrl->npassFDs = npassFDs; + ctrl->restore = restore; + for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) { if (ns_fd[i] != -1) { if (!ctrl->nsFDs) {/*allocate only once */ diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 4f600f3df..f52085ebf 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -1125,7 +1125,7 @@ static int lxcDomainCreateWithFiles(virDomainPtr dom, ret = virLXCProcessStart(dom->conn, driver, vm, nfiles, files, - (flags & VIR_DOMAIN_START_AUTODESTROY), + (flags & VIR_DOMAIN_START_AUTODESTROY), -1, VIR_DOMAIN_RUNNING_BOOTED); if (ret == 0) { @@ -1252,7 +1252,7 @@ lxcDomainCreateXMLWithFiles(virConnectPtr conn, if (virLXCProcessStart(conn, driver, vm, nfiles, files, - (flags & VIR_DOMAIN_START_AUTODESTROY), + (flags & VIR_DOMAIN_START_AUTODESTROY), -1, VIR_DOMAIN_RUNNING_BOOTED) < 0) { virDomainAuditStart(vm, "booted", false); virLXCDomainObjEndJob(driver, vm); diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 96041f2ec..1cd7f5bfe 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -118,7 +118,7 @@ virLXCProcessReboot(virLXCDriverPtr driver, virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN); vm->newDef = savedDef; if (virLXCProcessStart(conn, driver, vm, - 0, NULL, autodestroy, reason) < 0) { + 0, NULL, autodestroy, -1, reason) < 0) { VIR_WARN("Unable to handle reboot of vm %s", vm->def->name); goto cleanup; @@ -914,7 +914,8 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, size_t nfiles, int handshakefd, int * const logfd, - const char *pidfile) + const char *pidfile, + int restorefd) { size_t i; char *filterstr; @@ -993,6 +994,12 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, for (i = 0; i < nveths; i++) virCommandAddArgList(cmd, "--veth", veths[i], NULL); + if (restorefd != -1) { + virCommandAddArg(cmd, "--restore"); + virCommandAddArgFormat(cmd, "%d", restorefd); + virCommandPassFD(cmd, restorefd, 0); + } + virCommandPassFD(cmd, handshakefd, 0); virCommandDaemonize(cmd); virCommandSetPidFile(cmd, pidfile); @@ -1166,6 +1173,8 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm) * @driver: pointer to driver structure * @vm: pointer to virtual machine structure * @autoDestroy: mark the domain for auto destruction + * @restorefd: file descriptor pointing to the restore directory (-1 if not + * restoring) * @reason: reason for switching vm to running state * * Starts a vm @@ -1177,6 +1186,7 @@ int virLXCProcessStart(virConnectPtr conn, virDomainObjPtr vm, unsigned int nfiles, int *files, bool autoDestroy, + int restorefd, virDomainRunningReason reason) { int rc = -1, r; @@ -1386,7 +1396,7 @@ int virLXCProcessStart(virConnectPtr conn, files, nfiles, handshakefds[1], &logfd, - pidfile))) + pidfile, restorefd))) goto cleanup; /* now that we know it is about to start call the hook if present */ @@ -1494,6 +1504,9 @@ int virLXCProcessStart(virConnectPtr conn, if (!priv->machineName) goto cleanup; + if (restorefd != -1) + goto skip_cgroup_checks; + /* We know the cgroup must exist by this synchronization * point so lets detect that first, since it gives us a * more reliable way to kill everything off if something @@ -1510,6 +1523,8 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } + skip_cgroup_checks: + /* And we can get the first monitor connection now too */ if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) { /* Intentionally overwrite the real monitor error message, @@ -1596,7 +1611,7 @@ virLXCProcessAutostartDomain(virDomainObjPtr vm, if (vm->autostart && !virDomainObjIsActive(vm)) { ret = virLXCProcessStart(data->conn, data->driver, vm, - 0, NULL, false, + 0, NULL, false, -1, VIR_DOMAIN_RUNNING_BOOTED); virDomainAuditStart(vm, "booted", ret >= 0); if (ret < 0) { diff --git a/src/lxc/lxc_process.h b/src/lxc/lxc_process.h index d78cddef4..c724f31a7 100644 --- a/src/lxc/lxc_process.h +++ b/src/lxc/lxc_process.h @@ -29,6 +29,7 @@ int virLXCProcessStart(virConnectPtr conn, virDomainObjPtr vm, unsigned int nfiles, int *files, bool autoDestroy, + int restorefd, virDomainRunningReason reason); int virLXCProcessStop(virLXCDriverPtr driver, virDomainObjPtr vm, -- 2.14.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list