When doing lxc migration or simply restoring the container from a saved state, we need restore the container from CRIU img files that we have stored in disk. In this patch, we should extend lxcContainerStart into a more generic one, that either starts a container from scratch or restores it from a snapshot. Signed-off-by: Katerina Koukiou <k.koukiou@xxxxxxxxx> --- src/Makefile.am | 3 +- src/lxc/lxc_container.c | 200 +++++++++++++++++++++++++++++++++++++++++++++-- src/lxc/lxc_container.h | 3 +- src/lxc/lxc_controller.c | 109 ++++++++++++++++++++++++-- src/lxc/lxc_driver.c | 4 +- src/lxc/lxc_process.c | 23 +++++- src/lxc/lxc_process.h | 1 + 7 files changed, 323 insertions(+), 20 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 64a7680..1542251 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -761,7 +761,8 @@ LXC_CONTROLLER_SOURCES = \ lxc/lxc_cgroup.c lxc/lxc_cgroup.h \ lxc/lxc_domain.c lxc/lxc_domain.h \ lxc/lxc_fuse.c lxc/lxc_fuse.h \ - lxc/lxc_controller.c + lxc/lxc_controller.c \ + lxc/lxc_criu.c lxc/lxc_criu.h SECURITY_DRIVER_APPARMOR_HELPER_SOURCES = \ $(DATATYPES_SOURCES) \ diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index b857431..7d307ee 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -70,6 +70,8 @@ #include "virprocess.h" #include "virstring.h" +#include "lxc_criu.h" + #define VIR_FROM_THIS VIR_FROM_LXC VIR_LOG_INIT("lxc.lxc_container"); @@ -112,6 +114,7 @@ struct __lxc_child_argv { char **ttyPaths; int handshakefd; int *nsInheritFDs; + int restorefd; }; static int lxcContainerMountFSBlock(virDomainFSDefPtr fs, @@ -266,7 +269,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef, * Returns 0 on success or -1 in case of error */ static int lxcContainerSetupFDs(int *ttyfd, - size_t npassFDs, int *passFDs) + size_t npassFDs, int *passFDs, int restorefd) { int rc = -1; int open_max; @@ -362,6 +365,8 @@ static int lxcContainerSetupFDs(int *ttyfd, } for (fd = last_fd + 1; fd < open_max; fd++) { + if (fd == restorefd) + continue; int tmpfd = fd; VIR_MASS_CLOSE(tmpfd); } @@ -1077,6 +1082,36 @@ static int lxcContainerMountFSDev(virDomainDefPtr def, return ret; } + +static int lxcContainerMountFSDevPTSRestore(virDomainDefPtr def, + const char *stateDir) +{ + int ret = -1; + char *path = NULL; + int flags = MS_MOVE; + + VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir); + + if (virAsprintf(&path, "%s/%s.devpts", + stateDir, def->name) < 0) + return ret; + + VIR_DEBUG("Trying to move %s to /dev/pts", path); + + if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /dev/pts"), + path); + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(path); + return ret; +} + + static int lxcContainerMountFSDevPTS(virDomainDefPtr def, const char *stateDir) { @@ -2120,6 +2155,148 @@ static int lxcAttachNS(int *ns_fd) } +/* + * lxcContainerChildRestore: + * @data: pointer to container arguments + */ +static int lxcContainerChildRestore(void *data) +{ + lxc_child_argv_t *argv = data; + virDomainDefPtr vmDef = argv->config; + int ttyfd = -1; + int ret = -1; + char *ttyPath = NULL; + virDomainFSDefPtr root; + char *sec_mount_options = NULL; + char *stateDir = NULL; + char *rootfs_mount = NULL; + + if (NULL == vmDef) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("lxcChild() passed invalid vm definition")); + goto cleanup; + } + + if (lxcContainerWaitForContinue(argv->monitor) < 0) { + virReportSystemError(errno, "%s", + _("Failed to read the container continue message")); + goto cleanup; + } + VIR_DEBUG("Received container continue message"); + + if (lxcContainerSetID(vmDef) < 0) + goto cleanup; + + root = virDomainGetFilesystemForTarget(vmDef, "/"); + + if (argv->nttyPaths) { + const char *tty = argv->ttyPaths[0]; + if (STRPREFIX(tty, "/dev/pts/")) + tty += strlen("/dev/pts/"); + if (virAsprintf(&ttyPath, "%s/%s.devpts/%s", + LXC_STATE_DIR, vmDef->name, tty) < 0) + goto cleanup; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("At least one tty is required")); + goto cleanup; + } + + VIR_DEBUG("Container TTY path: %s", ttyPath); + + ttyfd = open(ttyPath, O_RDWR); + if (ttyfd < 0) { + virReportSystemError(errno, + _("Failed to open tty %s"), + ttyPath); + goto cleanup; + } + VIR_DEBUG("Container TTY fd: %d", ttyfd); + + if (!(sec_mount_options = virSecurityManagerGetMountOptions( + argv->securityDriver, + vmDef))) + goto cleanup; + + if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0) + goto cleanup; + + if (lxcContainerSendContinue(argv->handshakefd) < 0) { + virReportSystemError(errno, "%s", + _("Failed to send continue signal to controller")); + goto cleanup; + } + + VIR_DEBUG("Setting up container's std streams"); + + if (lxcContainerSetupFDs(&ttyfd, + argv->npassFDs, argv->passFDs, argv->restorefd) < 0) + goto cleanup; + + /* CRIU needs the container's root bind mounted so that it is the root of + * some mount. + */ + if (virAsprintf(&rootfs_mount, "/tmp/%s", vmDef->name) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to write rootfs dir mount path")); + goto cleanup; + } + + if (virFileMakePath(rootfs_mount) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to mkdir rootfs mount path")); + goto cleanup; + } + + if (mount(root->src, rootfs_mount, NULL, MS_BIND, NULL) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to create rootfs mountpoint")); + goto cleanup; + } + + if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0) + goto cleanup; + + /* Mounts /dev/pts */ + if (lxcContainerMountFSDevPTSRestore(vmDef, stateDir) < 0) { + virReportSystemError(errno, "%s", + _("Failed to mount dev/pts")); + goto cleanup; + } + + if (setsid() < 0) { + virReportSystemError(errno, "%s", + _("Unable to become session leader")); + } + + ret = 0; + + cleanup: + VIR_FORCE_CLOSE(argv->monitor); + VIR_FORCE_CLOSE(argv->handshakefd); + VIR_FORCE_CLOSE(ttyfd); + VIR_FREE(ttyPath); + VIR_FREE(rootfs_mount); + VIR_FREE(stateDir); + VIR_FREE(sec_mount_options); + + if (ret == 0) { + VIR_DEBUG("Executing container restore criu function"); + ret = lxcCriuRestore(vmDef, argv->restorefd, 0); + } + + if (ret != 0) { + VIR_DEBUG("Tearing down container"); + fprintf(stderr, + _("Failure in libvirt_lxc startup: %s\n"), + virGetLastErrorMessage()); + } + + return ret; +} + + + /** * lxcContainerChild: * @data: pointer to container arguments @@ -2242,7 +2419,7 @@ static int lxcContainerChild(void *data) VIR_FORCE_CLOSE(argv->handshakefd); VIR_FORCE_CLOSE(argv->monitor); if (lxcContainerSetupFDs(&ttyfd, - argv->npassFDs, argv->passFDs) < 0) + argv->npassFDs, argv->passFDs, -1) < 0) goto cleanup; /* Make init process of the container the leader of the new session. @@ -2332,7 +2509,8 @@ int lxcContainerStart(virDomainDefPtr def, int handshakefd, int *nsInheritFDs, size_t nttyPaths, - char **ttyPaths) + char **ttyPaths, + int restorefd) { pid_t pid; int cflags; @@ -2350,6 +2528,7 @@ int lxcContainerStart(virDomainDefPtr def, .ttyPaths = ttyPaths, .handshakefd = handshakefd, .nsInheritFDs = nsInheritFDs, + .restorefd = restorefd, }; /* allocate a stack for the container */ @@ -2399,10 +2578,19 @@ int lxcContainerStart(virDomainDefPtr def, VIR_DEBUG("Inheriting a UTS namespace"); } - VIR_DEBUG("Cloning container init process"); - pid = clone(lxcContainerChild, stacktop, cflags, &args); + if (restorefd == -1) + VIR_DEBUG("Cloning container init process"); + else + VIR_DEBUG("Cloning container process that will spawn criu restore"); + + if (restorefd != -1) + pid = clone(lxcContainerChildRestore, stacktop, SIGCHLD, &args); + else + pid = clone(lxcContainerChild, stacktop, cflags, &args); + VIR_FREE(stack); - VIR_DEBUG("clone() completed, new container PID is %d", pid); + if (restorefd == -1) + VIR_DEBUG("clone() completed, new container PID is %d", pid); if (pid < 0) { virReportSystemError(errno, "%s", diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 33eaab4..5d47071 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -63,7 +63,8 @@ int lxcContainerStart(virDomainDefPtr def, int handshakefd, int *nsInheritFDs, size_t nttyPaths, - char **ttyPaths); + char **ttyPaths, + int restorefd); int lxcContainerAvailable(int features); diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index e58ff1b..e178195 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -146,6 +146,8 @@ struct _virLXCController { virCgroupPtr cgroup; virLXCFusePtr fuse; + + int restore; }; #include "lxc_controller_dispatch.h" @@ -1009,6 +1011,64 @@ static int lxcControllerClearCapabilities(void) return 0; } +static int +lxcControllerFindRestoredPid(int fd) +{ + int initpid = 0; + int ret = -1; + char *checkpointdir = NULL; + char *pidfile = NULL; + char *checkpointfd = NULL; + int pidfilefd; + char c; + + if (fd < 0) + goto cleanup; + + if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", fd) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to write checkpoint dir path")); + goto cleanup; + } + + if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to readlink checkpoint dir path")); + goto cleanup; + } + + if (virAsprintf(&pidfile, "%s/pidfile", checkpointdir) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to write pidfile path")); + goto cleanup; + } + + if ((pidfilefd = virFileOpenAs(pidfile, O_RDONLY, 0, -1, -1, 0)) < 0) { + virReportSystemError(pidfilefd, + _("Failed to open domain's pidfile '%s'"), + pidfile); + goto cleanup; + } + + while ((saferead(pidfilefd, &c, 1) == 1) && c != EOF) + initpid = initpid*10 + c - '0'; + + ret = initpid; + + if (virFileRemove(pidfile, -1, -1) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to delete pidfile path")); + } + + cleanup: + VIR_FORCE_CLOSE(fd); + VIR_FORCE_CLOSE(pidfilefd); + VIR_FREE(pidfile); + VIR_FREE(checkpointdir); + VIR_FREE(checkpointfd); + return ret; +} + static bool wantReboot; static virMutex lock = VIR_MUTEX_INITIALIZER; @@ -2348,6 +2408,7 @@ virLXCControllerRun(virLXCControllerPtr ctrl) int containerhandshake[2] = { -1, -1 }; char **containerTTYPaths = NULL; size_t i; + bool restore_mode = (ctrl->restore != -1); if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0) goto cleanup; @@ -2404,8 +2465,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl) containerhandshake[1], ctrl->nsFDs, ctrl->nconsoles, - containerTTYPaths)) < 0) + containerTTYPaths, + ctrl->restore)) < 0) goto cleanup; + VIR_FORCE_CLOSE(control[1]); VIR_FORCE_CLOSE(containerhandshake[1]); @@ -2416,10 +2479,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl) for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) VIR_FORCE_CLOSE(ctrl->nsFDs[i]); - if (virLXCControllerSetupCgroupLimits(ctrl) < 0) + if (!restore_mode && virLXCControllerSetupCgroupLimits(ctrl) < 0) goto cleanup; - if (virLXCControllerSetupUserns(ctrl) < 0) + if (!restore_mode && virLXCControllerSetupUserns(ctrl) < 0) goto cleanup; if (virLXCControllerMoveInterfaces(ctrl) < 0) @@ -2444,13 +2507,33 @@ virLXCControllerRun(virLXCControllerPtr ctrl) if (lxcControllerClearCapabilities() < 0) goto cleanup; - if (virLXCControllerDaemonHandshake(ctrl) < 0) - goto cleanup; + if (restore_mode) { + int status; + int ret = waitpid(-1, &status, 0); + VIR_DEBUG("Got sig child %d", ret); + + /* We have two basic cases here. + * - CRIU died bacause of restore error and we do not have a running container + * - CRIU detached itself from the running container + */ + int initpid; + if ((initpid = lxcControllerFindRestoredPid(ctrl->restore)) < 0) { + virReportSystemError(errno, "%s", + _("Unable to get restored task pid")); + virNetDaemonQuit(ctrl->daemon); + goto cleanup; + } else { + ctrl->initpid = initpid; + } + } for (i = 0; i < ctrl->nconsoles; i++) if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0) goto cleanup; + if (virLXCControllerDaemonHandshake(ctrl) < 0) + goto cleanup; + /* We must not hold open a dbus connection for life * of LXC instance, since dbus-daemon is limited to * only a few 100 connections by default @@ -2487,6 +2570,8 @@ int main(int argc, char *argv[]) int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST]; int handshakeFd = -1; bool bg = false; + int restore = -1; + const struct option options[] = { { "background", 0, NULL, 'b' }, { "name", 1, NULL, 'n' }, @@ -2498,6 +2583,7 @@ int main(int argc, char *argv[]) { "share-net", 1, NULL, 'N' }, { "share-ipc", 1, NULL, 'I' }, { "share-uts", 1, NULL, 'U' }, + { "restore", 1, NULL, 'r' }, { "help", 0, NULL, 'h' }, { 0, 0, 0, 0 }, }; @@ -2525,7 +2611,7 @@ int main(int argc, char *argv[]) while (1) { int c; - c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:", + c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:r:", options, NULL); if (c == -1) @@ -2601,6 +2687,14 @@ int main(int argc, char *argv[]) securityDriver = optarg; break; + case 'r': + if (virStrToLong_i(optarg, NULL, 10, &restore) < 0) { + fprintf(stderr, "malformed --restore argument '%s'", + optarg); + goto cleanup; + } + break; + case 'h': case '?': fprintf(stderr, "\n"); @@ -2617,6 +2711,7 @@ int main(int argc, char *argv[]) fprintf(stderr, " -N FD, --share-net FD\n"); fprintf(stderr, " -I FD, --share-ipc FD\n"); fprintf(stderr, " -U FD, --share-uts FD\n"); + fprintf(stderr, " -r FD, --restore FD\n"); fprintf(stderr, " -h, --help\n"); fprintf(stderr, "\n"); rc = 0; @@ -2669,6 +2764,8 @@ int main(int argc, char *argv[]) ctrl->passFDs = passFDs; ctrl->npassFDs = npassFDs; + ctrl->restore = restore; + for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) { if (ns_fd[i] != -1) { if (!ctrl->nsFDs) {/*allocate only once */ diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 46af05d..bd47c91 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -1133,7 +1133,7 @@ static int lxcDomainCreateWithFiles(virDomainPtr dom, ret = virLXCProcessStart(dom->conn, driver, vm, nfiles, files, - (flags & VIR_DOMAIN_START_AUTODESTROY), + (flags & VIR_DOMAIN_START_AUTODESTROY), -1, VIR_DOMAIN_RUNNING_BOOTED); if (ret == 0) { @@ -1259,7 +1259,7 @@ lxcDomainCreateXMLWithFiles(virConnectPtr conn, if (virLXCProcessStart(conn, driver, vm, nfiles, files, - (flags & VIR_DOMAIN_START_AUTODESTROY), + (flags & VIR_DOMAIN_START_AUTODESTROY), -1, VIR_DOMAIN_RUNNING_BOOTED) < 0) { virDomainAuditStart(vm, "booted", false); if (!vm->persistent) { diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 28313f0..b4f92e0 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -123,7 +123,7 @@ virLXCProcessReboot(virLXCDriverPtr driver, virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN); vm->newDef = savedDef; if (virLXCProcessStart(conn, driver, vm, - 0, NULL, autodestroy, reason) < 0) { + 0, NULL, autodestroy, -1, reason) < 0) { VIR_WARN("Unable to handle reboot of vm %s", vm->def->name); goto cleanup; @@ -929,7 +929,8 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, size_t nfiles, int handshakefd, int * const logfd, - const char *pidfile) + const char *pidfile, + int restorefd) { size_t i; char *filterstr; @@ -1008,6 +1009,12 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver, for (i = 0; i < nveths; i++) virCommandAddArgList(cmd, "--veth", veths[i], NULL); + if (restorefd != -1) { + virCommandAddArg(cmd, "--restore"); + virCommandAddArgFormat(cmd, "%d", restorefd); + virCommandPassFD(cmd, restorefd, 0); + } + virCommandPassFD(cmd, handshakefd, 0); virCommandDaemonize(cmd); virCommandSetPidFile(cmd, pidfile); @@ -1181,6 +1188,8 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm) * @driver: pointer to driver structure * @vm: pointer to virtual machine structure * @autoDestroy: mark the domain for auto destruction + * @restorefd: file descriptor pointing to the restore directory (-1 if not + * restoring) * @reason: reason for switching vm to running state * * Starts a vm @@ -1192,6 +1201,7 @@ int virLXCProcessStart(virConnectPtr conn, virDomainObjPtr vm, unsigned int nfiles, int *files, bool autoDestroy, + int restorefd, virDomainRunningReason reason) { int rc = -1, r; @@ -1406,7 +1416,7 @@ int virLXCProcessStart(virConnectPtr conn, files, nfiles, handshakefds[1], &logfd, - pidfile))) + pidfile, restorefd))) goto cleanup; /* now that we know it is about to start call the hook if present */ @@ -1511,6 +1521,9 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } + if (restorefd != -1) + goto skip_cgroup_checks; + /* We know the cgroup must exist by this synchronization * point so lets detect that first, since it gives us a * more reliable way to kill everything off if something @@ -1527,6 +1540,8 @@ int virLXCProcessStart(virConnectPtr conn, goto cleanup; } + skip_cgroup_checks: + /* Get the machine name so we can properly delete it through * systemd later */ if (!(priv->machineName = virSystemdGetMachineNameByPID(vm->pid))) @@ -1634,7 +1649,7 @@ virLXCProcessAutostartDomain(virDomainObjPtr vm, if (vm->autostart && !virDomainObjIsActive(vm)) { ret = virLXCProcessStart(data->conn, data->driver, vm, - 0, NULL, false, + 0, NULL, false, -1, VIR_DOMAIN_RUNNING_BOOTED); virDomainAuditStart(vm, "booted", ret >= 0); if (ret < 0) { diff --git a/src/lxc/lxc_process.h b/src/lxc/lxc_process.h index d78cdde..c724f31 100644 --- a/src/lxc/lxc_process.h +++ b/src/lxc/lxc_process.h @@ -29,6 +29,7 @@ int virLXCProcessStart(virConnectPtr conn, virDomainObjPtr vm, unsigned int nfiles, int *files, bool autoDestroy, + int restorefd, virDomainRunningReason reason); int virLXCProcessStop(virLXCDriverPtr driver, virDomainObjPtr vm, -- 2.7.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list