This patch adds the source code of helper functions into files lxc_criu.{h,c} to support LXC checkpoint/restore using CRIU binary. To save container state, LXC follows the same pattern of QEMU and libxl using a file with a header with metadata, but as CRIU saves multiple files, it needs to inserted in a unique file using a type of compression. Using TAR for instance. Signed-off-by: Julio Faracco <jcfaracco@xxxxxxxxx> --- src/lxc/lxc_criu.c | 405 ++++++++++++++++++++++++++++++++++++++++++++ src/lxc/lxc_criu.h | 50 ++++++ src/lxc/meson.build | 2 + 3 files changed, 457 insertions(+) create mode 100644 src/lxc/lxc_criu.c create mode 100644 src/lxc/lxc_criu.h diff --git a/src/lxc/lxc_criu.c b/src/lxc/lxc_criu.c new file mode 100644 index 0000000000..a82bd5ffde --- /dev/null +++ b/src/lxc/lxc_criu.c @@ -0,0 +1,405 @@ +/* + * lxc_criu.c: wrapper functions for CRIU C API to be used for lxc migration + * + * Copyright (c) 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/mount.h> + +#include "virobject.h" +#include "virerror.h" +#include "virlog.h" +#include "virfile.h" +#include "vircommand.h" +#include "virstring.h" +#include "viralloc.h" +#include "virutil.h" + +#include "lxc_domain.h" +#include "lxc_driver.h" +#include "lxc_criu.h" + +#define VIR_FROM_THIS VIR_FROM_LXC + +VIR_LOG_INIT("lxc.lxc_criu"); + +#if WITH_CRIU +typedef enum { + LXC_SAVE_FORMAT_RAW = 0, + LXC_SAVE_FORMAT_GZIP = 1, + LXC_SAVE_FORMAT_BZIP2 = 2, + LXC_SAVE_FORMAT_XZ = 3, + LXC_SAVE_FORMAT_LZOP = 4, + + LXC_SAVE_FORMAT_LAST +} virLXCSaveFormat; + +VIR_ENUM_DECL(lxcSaveCompression); +VIR_ENUM_IMPL(lxcSaveCompression, + LXC_SAVE_FORMAT_LAST, + "raw", + "gzip", + "bzip2", + "xz", + "lzop", +); + + +/* lxcSaveImageGetCompressionProgram: + * @imageFormat: String representation from lxc.conf for the compression + * image format being used (dump, save, or snapshot). + * @compresspath: Pointer to a character string to store the fully qualified + * path from virFindFileInPath. + * @styleFormat: String representing the style of format (dump, save, snapshot) + * + * Returns: + * virQEMUSaveFormat - Integer representation of the compression + * program to be used for particular style + * (e.g. dump, save, or snapshot). + * LXC_SAVE_FORMAT_RAW - If there is no lxc.conf imageFormat value or + * no there was an error, then just return RAW + * indicating none. + */ +static int +lxcSaveImageGetCompressionProgram(const char *imageFormat, + virCommandPtr *compressor, + const char *styleFormat) +{ + const char *prog; + int ret; + + *compressor = NULL; + + /* Use tar to compress all .img files */ + if (!(prog = virFindFileInPath("tar"))) + return -1; + + *compressor = virCommandNew(prog); + + if (STREQ(styleFormat, "save")) { + /* Remove files after added into tar */ + virCommandAddArgList(*compressor, "--create", + "--remove-files", NULL); + } else if (STREQ(styleFormat, "dump")) { + virCommandAddArg(*compressor, "--extract"); + } else { + return -1; + } + + if (!imageFormat) + return 0; + + if ((ret = lxcSaveCompressionTypeFromString(imageFormat)) < 0) + return -1; + + switch (ret) { + case LXC_SAVE_FORMAT_GZIP: + virCommandAddArg(*compressor, "--gzip"); + break; + case LXC_SAVE_FORMAT_BZIP2: + virCommandAddArg(*compressor, "--bzip2"); + break; + case LXC_SAVE_FORMAT_XZ: + virCommandAddArg(*compressor, "--xz"); + break; + case LXC_SAVE_FORMAT_LZOP: + virCommandAddArg(*compressor, "--lzop"); + break; + case LXC_SAVE_FORMAT_RAW: + default: + break; + } + + return ret; +} + + +int lxcCriuCompress(const char *checkpointdir, + char *compressionType) +{ + virCommandPtr cmd; + g_autofree char *tarfile = NULL; + int ret = -1; + + if ((ret = lxcSaveImageGetCompressionProgram(compressionType, + &cmd, + "save")) < 0) + return -1; + + tarfile = g_strdup_printf("%s/criu.save", checkpointdir); + + virCommandAddArgFormat(cmd, "--file=%s", tarfile); + virCommandAddArgFormat(cmd, "--directory=%s/save/", checkpointdir); + virCommandAddArg(cmd, "."); + + if (virCommandRun(cmd, NULL) < 0) + return -1; + + return ret; +} + + +int lxcCriuDecompress(const char *checkpointdir, + char *compressionType) +{ + virCommandPtr cmd; + g_autofree char *tarfile = NULL; + g_autofree char *savedir = NULL; + int ret = -1; + + if ((ret = lxcSaveImageGetCompressionProgram(compressionType, + &cmd, + "dump")) < 0) + return -1; + + savedir = g_strdup_printf("%s/save/", checkpointdir); + if (virFileMakePath(savedir) < 0) { + virReportSystemError(errno, + _("Failed to mkdir %s"), savedir); + return -1; + } + + tarfile = g_strdup_printf("%s/criu.save", checkpointdir); + + virCommandAddArgFormat(cmd, "--file=%s", tarfile); + virCommandAddArgFormat(cmd, "--directory=%s", savedir); + + if (virCommandRun(cmd, NULL) < 0) + return -1; + + return ret; +} + + +int lxcCriuDump(virDomainObjPtr vm, + const char *checkpointdir) +{ + int ret = -1; + virLXCDomainObjPrivatePtr priv = vm->privateData; + virCommandPtr cmd; + struct stat sb; + g_autofree char *path = NULL; + g_autofree char *tty_info_path = NULL; + g_autofree char *ttyinfo = NULL; + g_autofree char *pidfile = NULL; + g_autofree char *pidbuf = NULL; + g_autofree char *savedir = NULL; + int pidlen; + int pidfd; + int status; + + savedir = g_strdup_printf("%s/save/", checkpointdir); + if (virFileMakePath(savedir) < 0) { + virReportSystemError(errno, + _("Failed to mkdir %s"), savedir); + return -1; + } + + pidfile = g_strdup_printf("%s/save/dump.pid", checkpointdir); + pidbuf = g_strdup_printf("%d", priv->initpid); + pidlen = strlen(pidbuf); + + pidfd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (safewrite(pidfd, pidbuf, pidlen) != pidlen) { + virReportSystemError(errno, "%s", _("criu pid file write failed")); + return -1; + } + + cmd = virCommandNew(CRIU); + virCommandAddArg(cmd, "dump"); + + virCommandAddArgList(cmd, "--images-dir", savedir, NULL); + + virCommandAddArgList(cmd, "--log-file", "dump.log", NULL); + + virCommandAddArgList(cmd, "-vvvv", NULL); + + virCommandAddArg(cmd, "--tree"); + virCommandAddArgFormat(cmd, "%d", priv->initpid); + + virCommandAddArgList(cmd, "--tcp-established", "--file-locks", + "--link-remap", "--force-irmap", NULL); + + virCommandAddArgList(cmd, "--manage-cgroup", NULL); + + virCommandAddArgList(cmd, "--enable-external-sharing", + "--enable-external-masters", NULL); + + virCommandAddArgList(cmd, "--enable-fs", "hugetlbfs", + "--enable-fs", "tracefs", NULL); + + /* Add support for FUSE */ + virCommandAddArgList(cmd, "--ext-mount-map", "/proc/meminfo:fuse", NULL); + virCommandAddArgList(cmd, "--ghost-limit", "10000000", NULL); + + virCommandAddArgList(cmd, "--ext-mount-map", "/dev/console:console", NULL); + virCommandAddArgList(cmd, "--ext-mount-map", "/dev/tty1:tty1", NULL); + virCommandAddArgList(cmd, "--ext-mount-map", "auto", NULL); + + /* The master pair of the /dev/pts device lives outside from what is dumped + * inside the libvirt-lxc process. Add the slave pair as an external tty + * otherwise criu will fail. + */ + path = g_strdup_printf("/proc/%d/root/dev/pts/0", priv->initpid); + + if (stat(path, &sb) < 0) { + virReportSystemError(errno, + _("Unable to stat %s"), path); + goto cleanup; + } + + tty_info_path = g_strdup_printf("%s/tty.info", savedir); + ttyinfo = g_strdup_printf("tty[%x:%x]", (unsigned int)sb.st_rdev, + (unsigned int)sb.st_dev); + + if (virFileWriteStr(tty_info_path, ttyinfo, 0666) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to write tty info to %s"), tty_info_path); + goto cleanup; + } + + VIR_DEBUG("tty.info: tty[%x:%x]", + (unsigned int)sb.st_dev, (unsigned int)sb.st_rdev); + virCommandAddArg(cmd, "--external"); + virCommandAddArgFormat(cmd, "tty[%x:%x]", + (unsigned int)sb.st_rdev, (unsigned int)sb.st_dev); + + VIR_DEBUG("About to checkpoint domain %s (pid = %d)", + vm->def->name, priv->initpid); + virCommandRawStatus(cmd); + if (virCommandRun(cmd, &status) < 0) + goto cleanup; + + ret = 0; + + cleanup: + if (ret < 0) + return ret; + return status; +} + +int lxcCriuRestore(virDomainDefPtr def, + int restorefd, int ttyfd) +{ + virCommandPtr cmd; + g_autofree char *ttyinfo = NULL; + g_autofree char *inheritfd = NULL; + g_autofree char *tty_info_path = NULL; + g_autofree char *checkpointfd = NULL; + g_autofree char *checkpointdir = NULL; + g_autofree char *rootfs_mount = NULL; + g_autofree gid_t *groups = NULL; + int ret = -1; + int ngroups; + + cmd = virCommandNew(CRIU); + virCommandAddArg(cmd, "restore"); + + checkpointfd = g_strdup_printf("/proc/self/fd/%d", restorefd); + + if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to readlink checkpoint dir path")); + return -1; + } + + /* CRIU needs the container's root bind mounted so that it is the root of + * some mount. + */ + rootfs_mount = g_strdup_printf("%s/save/%s", LXC_STATE_DIR, def->name); + + virCommandAddArgList(cmd, "--images-dir", checkpointdir, NULL); + + virCommandAddArgList(cmd, "--log-file", "restore.log", NULL); + + virCommandAddArgList(cmd, "--pidfile", "restore.pid", NULL); + + virCommandAddArgList(cmd, "-vvvv", NULL); + virCommandAddArgList(cmd, "--tcp-established", "--file-locks", + "--link-remap", "--force-irmap", NULL); + + virCommandAddArgList(cmd, "--enable-external-sharing", + "--enable-external-masters", NULL); + + virCommandAddArgList(cmd, "--ext-mount-map", "auto", NULL); + + virCommandAddArgList(cmd, "--enable-fs", "hugetlbfs", + "--enable-fs", "tracefs", NULL); + + virCommandAddArgList(cmd, "--ext-mount-map", "fuse:/proc/meminfo", NULL); + + virCommandAddArgList(cmd, "--ext-mount-map", "console:/dev/console", NULL); + virCommandAddArgList(cmd, "--ext-mount-map", "tty1:/dev/tty1", NULL); + + virCommandAddArgList(cmd, "--restore-detached", "--restore-sibling", NULL); + + /* Restore external tty that was saved in tty.info file + */ + tty_info_path = g_strdup_printf("%s/tty.info", checkpointdir); + + if (virFileReadAll(tty_info_path, 1024, &ttyinfo) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to read tty info from %s"), tty_info_path); + return -1; + } + + inheritfd = g_strdup_printf("fd[%d]:%s", ttyfd, ttyinfo); + + virCommandAddArgList(cmd, "--inherit-fd", inheritfd, NULL); + + /* Change the root filesystem because we run in mount namespace. + */ + virCommandAddArgList(cmd, "--root", rootfs_mount, NULL); + + if ((ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), + &groups)) < 0) + return -1; + + + VIR_DEBUG("Executing init binary"); + /* this function will only return if an error occurred */ + ret = virCommandExec(cmd, groups, ngroups); + + if (ret != 0) { + VIR_DEBUG("Tearing down container"); + fprintf(stderr, + _("Failure in libvirt_lxc startup: %s\n"), + virGetLastErrorMessage()); + } + + return ret; +} +#else +int lxcCriuDump(virDomainObjPtr vm ATTRIBUTE_UNUSED, + const char *checkpointdir ATTRIBUTE_UNUSED) +{ + virReportUnsupportedError(); + return -1; +} + +int lxcCriuRestore(virDomainDefPtr def ATTRIBUTE_UNUSED, + int fd ATTRIBUTE_UNUSED, + int ttyfd ATTRIBUTE_UNUSED) +{ + virReportUnsupportedError(); + return -1; +} +#endif diff --git a/src/lxc/lxc_criu.h b/src/lxc/lxc_criu.h new file mode 100644 index 0000000000..7dfd78aa24 --- /dev/null +++ b/src/lxc/lxc_criu.h @@ -0,0 +1,50 @@ +/* + * lxc_criu.h: CRIU C API methods wrapper + * + * Copyright (c) 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#ifndef LXC_CRIU_H +# define LXC_CRIU_H + +# include "virobject.h" + +#define LXC_SAVE_MAGIC "LXCCriuSaveMagic" +#define LXC_SAVE_VERSION 2 + +typedef struct _virLXCSaveHeader virLXCSaveHeader; +typedef virLXCSaveHeader *virLXCSaveHeaderPtr; +struct _virLXCSaveHeader { + char magic[sizeof(LXC_SAVE_MAGIC)-1]; + uint32_t version; + uint32_t xmlLen; + uint32_t compressed; + uint32_t unused[9]; +}; + +int lxcCriuCompress(const char *checkpointdir, + char *compressionType); + +int lxcCriuDecompress(const char *checkpointdir, + char *compressionType); + +int lxcCriuDump(virDomainObjPtr vm, + const char *checkpointdir); + +int lxcCriuRestore(virDomainDefPtr def, + int fd, int ttyfd); +#endif /* LXC_CRIU_H */ diff --git a/src/lxc/meson.build b/src/lxc/meson.build index ad5c659dba..1a8524aab3 100644 --- a/src/lxc/meson.build +++ b/src/lxc/meson.build @@ -9,6 +9,7 @@ lxc_driver_sources = [ 'lxc_monitor.c', 'lxc_native.c', 'lxc_process.c', + 'lxc_criu.c', ] lxc_monitor_protocol = files('lxc_monitor_protocol.x') @@ -61,6 +62,7 @@ lxc_controller_sources = files( 'lxc_domain.c', 'lxc_fuse.c', 'lxc_controller.c', + 'lxc_criu.c', ) lxc_controller_generated = custom_target( -- 2.27.0