lxc / docker containers gives option to inherit the namespaces. Example lxc-start has option [ --share-[net|ipc|uts] name|pid ] where --share-net name|pid means Inherit a network namespace from a name container or a pid. This patch tries to add the similar option to libvirt lxc. So to inherit namespace from name container c2. add this into xml. <lxc:namespace> <sharenet type='name' value='c2'/> </lxc:namespace> And to inherit namespace from a pid. add this into xml. <lxc:namespace> <sharenet type='pid' value='10245'/> </lxc:namespace> And to inherit namespace from a netns. add this into xml. <lxc:namespace> <sharenet type='netns' value='red'/> </lxc:namespace> Similar options for ipc/uts. <shareipc /> , <shareuts /> The reasong lxc xml namespace is added because this feature is very specific to lxc. Therfore wanted to keep it seperated from actual libvirt xml domain. So the final vrish xml file would look like <domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'> <name>cn-03</name> <memory>327680</memory> <os> <type>exe</type> <init>/sbin/init</init> </os> <lxc:namespace> <sharenet type='netns' value='red'/> </lxc:namespace> <vcpu>1</vcpu> <clock offset='utc'/> <on_poweroff>destroy</on_poweroff> <on_reboot>restart</on_reboot> <on_crash>destroy</on_crash> <devices> <emulator>/usr/lib/libvirt/libvirt_lxc</emulator> <filesystem type='mount'> <source dir='/var/lib/lxc/u1/rootfs'/> <target dir='/'/> </filesystem> <console type='pty'/> </devices> </domain> -imran --- src/Makefile.am | 5 +- src/lxc/lxc_conf.c | 2 +- src/lxc/lxc_conf.h | 23 +++++ src/lxc/lxc_container.c | 191 ++++++++++++++++++++++++++++++++++-- src/lxc/lxc_domain.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_domain.h | 1 + 6 files changed, 463 insertions(+), 13 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 579421d..1a78fde 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1293,7 +1293,8 @@ libvirt_driver_lxc_impl_la_CFLAGS = \ -I$(srcdir)/access \ -I$(srcdir)/conf \ $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) $(FUSE_LIBS) +libvirt_driver_lxc_impl_la_LDFLAGS = libvirt-lxc.la if WITH_BLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -2652,6 +2653,8 @@ libvirt_lxc_LDADD = \ libvirt-net-rpc.la \ libvirt_security_manager.la \ libvirt_conf.la \ + libvirt.la \ + libvirt-lxc.la \ libvirt_util.la \ ../gnulib/lib/libgnu.la if WITH_DTRACE_PROBES diff --git a/src/lxc/lxc_conf.c b/src/lxc/lxc_conf.c index c393cb5..96a0f47 100644 --- a/src/lxc/lxc_conf.c +++ b/src/lxc/lxc_conf.c @@ -213,7 +213,7 @@ lxcDomainXMLConfInit(void) { return virDomainXMLOptionNew(&virLXCDriverDomainDefParserConfig, &virLXCDriverPrivateDataCallbacks, - NULL); + &virLXCDriverDomainXMLNamespace); } diff --git a/src/lxc/lxc_conf.h b/src/lxc/lxc_conf.h index 8340b1f..59002e5 100644 --- a/src/lxc/lxc_conf.h +++ b/src/lxc/lxc_conf.h @@ -67,6 +67,29 @@ struct _virLXCDriverConfig { bool securityRequireConfined; }; + +typedef enum { + VIR_DOMAIN_NAMESPACE_SHARENET = 0, + VIR_DOMAIN_NAMESPACE_SHAREIPC, + VIR_DOMAIN_NAMESPACE_SHAREUTS, + VIR_DOMAIN_NAMESPACE_LAST, +} virDomainNamespace; + +struct ns_info { + const char *proc_name; + int clone_flag; +}; + +extern const struct ns_info ns_info[VIR_DOMAIN_NAMESPACE_LAST]; + +typedef struct _lxcDomainDef lxcDomainDef; +typedef lxcDomainDef *lxcDomainDefPtr; +struct _lxcDomainDef { + int ns_inherit_fd[VIR_DOMAIN_NAMESPACE_LAST]; + char *ns_type[VIR_DOMAIN_NAMESPACE_LAST]; + char *ns_val[VIR_DOMAIN_NAMESPACE_LAST]; +}; + struct _virLXCDriver { virMutex lock; diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 9a9ae5c..a9a7ba0 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -25,8 +25,8 @@ */ #include <config.h> - #include <fcntl.h> +#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -38,7 +38,6 @@ #include <mntent.h> #include <sys/reboot.h> #include <linux/reboot.h> - /* Yes, we want linux private one, for _syscall2() macro */ #include <linux/unistd.h> @@ -99,6 +98,50 @@ VIR_LOG_INIT("lxc.lxc_container"); typedef char lxc_message_t; #define LXC_CONTINUE_MSG 'c' +#ifdef __linux__ +/* + * Workaround older glibc. While kernel may support the setns + * syscall, the glibc wrapper might not exist. If that's the + * case, use our own. + */ +# ifndef __NR_setns +# if defined(__x86_64__) +# define __NR_setns 308 +# elif defined(__i386__) +# define __NR_setns 346 +# elif defined(__arm__) +# define __NR_setns 375 +# elif defined(__aarch64__) +# define __NR_setns 375 +# elif defined(__powerpc__) +# define __NR_setns 350 +# elif defined(__s390__) +# define __NR_setns 339 +# endif +# endif + +# ifndef HAVE_SETNS +# if defined(__NR_setns) +# include <sys/syscall.h> + +static inline int setns(int fd, int nstype) +{ + return syscall(__NR_setns, fd, nstype); +} +# else /* !__NR_setns */ +# error Please determine the syscall number for setns on your architecture +# endif +# endif +#else /* !__linux__ */ +static inline int setns(int fd ATTRIBUTE_UNUSED, int nstype ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Namespaces are not supported on this platform.")); + return -1; +} +#endif + + typedef struct __lxc_child_argv lxc_child_argv_t; struct __lxc_child_argv { virDomainDefPtr config; @@ -2233,7 +2276,6 @@ static int lxcContainerChild(void *data) vmDef->os.init); goto cleanup; } - /* rename and enable interfaces */ if (lxcContainerRenameAndEnableInterfaces(vmDef, argv->nveths, @@ -2321,6 +2363,99 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) return VIR_ARCH_NONE; } +/* Used only for containers,same as the one defined in + * domain_conf.c. But used locally + */ +static const struct ns_info ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = { + [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + + +static void close_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] > -1) { + if (VIR_CLOSE(ns_fd[i]) < 0) + virReportSystemError(errno, "%s", _("failed to close file")); + ns_fd[i] = -1; + } + } +} + + +/** + * lxcPreserve_ns: + * @ns_fd: array to store current namespace + * @clone_flags: namespaces that need to be preserved + */ +static int lxcPreserve_ns(int ns_fd[VIR_DOMAIN_NAMESPACE_LAST], int clone_flags) +{ + int i, saved_errno; + char *path = NULL; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + ns_fd[i] = -1; + + if (access("/proc/self/ns", X_OK)) { + virReportSystemError(errno, "%s", + _("Kernel does not support attach; preserve_ns ignored")); + return 0; + } + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if ((clone_flags & ns_info_local[i].clone_flag) == 0) + continue; + if (virAsprintf(&path, "/proc/self/ns/%s", + ns_info_local[i].proc_name) < 0) + goto error; + ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC); + if (ns_fd[i] < 0) + goto error; + } + VIR_FREE(path); + return 0; + error: + saved_errno = errno; + close_ns(ns_fd); + errno = saved_errno; + VIR_FREE(path); + virReportSystemError(errno, _("failed to open '%s'"), path); + return -1; +} + +/** + * lxcAttach_ns: + * @ns_fd: array of namespaces to attach + */ +static int lxcAttach_ns(const int ns_fd[VIR_DOMAIN_NAMESPACE_LAST]) +{ + int i; + + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + if (ns_fd[i] < 0) + continue; + VIR_DEBUG("Setting into namespace\n"); + + /* We get EINVAL if new NS is same as the current + * NS, or if the fd namespace doesn't match the + * type passed to setns()'s second param. Since we + * pass 0, we know the EINVAL is harmless + */ + if (setns(ns_fd[i], 0) < 0 && + errno != EINVAL) + goto error; + } + return 0; + + error: + virReportSystemError(errno, _("failed to set namespace '%s'") + , ns_info_local[i].proc_name); + return -1; +} + /** * lxcContainerStart: @@ -2346,9 +2481,12 @@ int lxcContainerStart(virDomainDefPtr def, char **ttyPaths) { pid_t pid; - int cflags; + int cflags, i; int stacksize = getpagesize() * 4; char *stack, *stacktop; + int saved_ns_fd[VIR_DOMAIN_NAMESPACE_LAST]; + int preserve_mask = 0; + lxcDomainDefPtr lxcDef; lxc_child_argv_t args = { .config = def, .securityDriver = securityDriver, @@ -2368,7 +2506,14 @@ int lxcContainerStart(virDomainDefPtr def, stacktop = stack + stacksize; - cflags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|SIGCHLD; + lxcDef = def->namespaceData; + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) + if (lxcDef && lxcDef->ns_inherit_fd[i] != -1) + preserve_mask |= ns_info_local[i].clone_flag; + + + + cflags = CLONE_NEWPID|CLONE_NEWNS|SIGCHLD; if (userns_required(def)) { if (userns_supported()) { @@ -2381,10 +2526,36 @@ int lxcContainerStart(virDomainDefPtr def, return -1; } } + if (!lxcDef || (lxcDef && lxcDef->ns_inherit_fd[VIR_DOMAIN_NAMESPACE_SHARENET] == -1)) { + if (lxcNeedNetworkNamespace(def)) { + VIR_DEBUG("Enable network namespaces"); + cflags |= CLONE_NEWNET; + } + } else { + VIR_DEBUG("Inheriting a net namespace"); + } - if (lxcNeedNetworkNamespace(def)) { - VIR_DEBUG("Enable network namespaces"); - cflags |= CLONE_NEWNET; + if (!lxcDef || (lxcDef && lxcDef->ns_inherit_fd[VIR_DOMAIN_NAMESPACE_SHAREIPC] == -1)) { + cflags |= CLONE_NEWIPC; + } else { + VIR_DEBUG("Inheriting an IPC namespace"); + } + + if (!lxcDef || (lxcDef && lxcDef->ns_inherit_fd[VIR_DOMAIN_NAMESPACE_SHAREUTS] == -1)) { + cflags |= CLONE_NEWUTS; + } else { + VIR_DEBUG("Inheriting a UTS namespace"); + } + + if (lxcDef && lxcPreserve_ns(saved_ns_fd, preserve_mask) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to preserve the namespace")); + return -1; + } + if (lxcDef && lxcAttach_ns(lxcDef->ns_inherit_fd) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to attach the namespace")); + return -1; } VIR_DEBUG("Cloning container init process"); @@ -2397,6 +2568,10 @@ int lxcContainerStart(virDomainDefPtr def, _("Failed to run clone container")); return -1; } + if (lxcDef && lxcAttach_ns(saved_ns_fd)) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("failed to restore saved namespaces")); + } return pid; } diff --git a/src/lxc/lxc_domain.c b/src/lxc/lxc_domain.c index c2180cb..6e4a19a 100644 --- a/src/lxc/lxc_domain.c +++ b/src/lxc/lxc_domain.c @@ -20,14 +20,18 @@ */ #include <config.h> - #include "lxc_domain.h" - #include "viralloc.h" #include "virlog.h" #include "virerror.h" +#include <fcntl.h> +#include <libxml/xpathInternals.h> +#include "virstring.h" +#include "virutil.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_LXC +#define LXC_NAMESPACE_HREF "http://libvirt.org/schemas/domain/lxc/1.0" VIR_LOG_INIT("lxc.lxc_domain"); @@ -41,6 +45,251 @@ static void *virLXCDomainObjPrivateAlloc(void) return priv; } + +static int open_ns(const char *nnsname_pid, const char *ns_proc_name) +{ + int fd = -1; + virDomainPtr dom = NULL; + virConnectPtr conn = NULL; + pid_t pid; + int nfdlist; + int *fdlist; + char *path = NULL; + char *eptr; + pid = strtol(nnsname_pid, &eptr, 10); + if (*eptr != '\0' || pid < 1) { + /* check if the domain is running, then set the namespaces + * to that container + */ + size_t i = 0; + const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" }; + conn = virConnectOpen("lxc:///"); + if (!conn) + goto cleanup; + dom = virDomainLookupByName(conn, nnsname_pid); + if (!dom) + goto cleanup; + if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) + goto cleanup; + /* Internally above function calls virProcessGetNamespaces + * function which opens ns + * in the order { "user", "ipc", "uts", "net", "pid", "mnt" } + */ + for (i = 0; i < ARRAY_CARDINALITY(ns); i++) { + if (STREQ(ns[i], ns_proc_name)) { + fd = fdlist[i]; + break; + } + } + if (nfdlist > 0) + VIR_FREE(fdlist); + } else { + if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_proc_name) < 0) + goto cleanup; + fd = open(path, O_RDONLY); + } +cleanup: + if (dom) + virDomainFree(dom); + VIR_FREE(path); + (fd < 0)? VIR_ERROR( + _("failed to open ns %s"), nnsname_pid): + VIR_DEBUG("OPENED NAMESPACE : fd %d\n", fd); + return fd; +} + + +/* Used only for containers */ +const struct ns_info ns_info[VIR_DOMAIN_NAMESPACE_LAST] = { + [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET}, + [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC}, + [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS} +}; + +VIR_ENUM_DECL(virDomainNamespace) +VIR_ENUM_IMPL(virDomainNamespace, VIR_DOMAIN_NAMESPACE_LAST, + N_("sharenet"), + N_("shareipc"), + N_("shareuts")) + +static void +lxcDomainDefNamespaceFree(void *nsdata) +{ + int j; + lxcDomainDefPtr lxcDef = nsdata; + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + if (lxcDef->ns_inherit_fd[j] > 0) { + VIR_FREE(lxcDef->ns_type); + VIR_FREE(lxcDef->ns_val); +#if 0 + if (VIR_CLOSE(lxcDef->ns_inherit_fd[j]) < 0) + virReportSystemError(errno, "%s", _("failed to close file")); +#endif + } + } + VIR_FREE(nsdata); +} + +static int +lxcDomainDefNamespaceParse(xmlDocPtr xml ATTRIBUTE_UNUSED, + xmlNodePtr root ATTRIBUTE_UNUSED, + xmlXPathContextPtr ctxt, + void **data) +{ + lxcDomainDefPtr lxcDef = NULL; + xmlNodePtr *nodes = NULL; + bool uses_lxc_ns = false; + xmlNodePtr node; + int feature; + int n; + char *tmp = NULL; + size_t i; + pid_t fd = -1; + + if (xmlXPathRegisterNs(ctxt, BAD_CAST "lxc", BAD_CAST LXC_NAMESPACE_HREF) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to register xml namespace '%s'"), + LXC_NAMESPACE_HREF); + return -1; + } + + if (VIR_ALLOC(lxcDef) < 0) + return -1; + + /* Init ns_herit_fd for namespaces */ + for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) { + lxcDef->ns_inherit_fd[i] = -1; + lxcDef->ns_type[i] = NULL; + lxcDef->ns_val[i] = NULL; + } + + node = ctxt->node; + if ((n = virXPathNodeSet("./lxc:namespace/*", ctxt, &nodes)) < 0) + goto error; + uses_lxc_ns |= n > 0; + + for (i = 0; i < n; i++) { + feature = + virDomainNamespaceTypeFromString((const char *) nodes[i]->name); + if (feature < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unsupported Namespace feature: %s"), + nodes[i]->name); + goto error; + } + + ctxt->node = nodes[i]; + + switch ((virDomainNamespace) feature) { + case VIR_DOMAIN_NAMESPACE_SHARENET: + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + { + tmp = virXMLPropString(nodes[i], "type"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + /* save the tmp so that its needed while writing to xml */ + lxcDef->ns_type[feature] = tmp; + tmp = virXMLPropString(nodes[i], "value"); + if (tmp == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("No lxc environment type specified")); + goto error; + } + lxcDef->ns_val[feature] = tmp; + /*netns option is only for VIR_DOMAIN_NAMESPACE_SHARENET*/ + if (STREQ("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) { + char *path = NULL; + if (virAsprintf(&path, "/var/run/netns/%s", tmp) < 0) + goto error; + fd = open(path, O_RDONLY); + VIR_FREE(path); + } else { + fd = open_ns(tmp, ns_info[feature].proc_name); + if (fd < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("unable to open %s namespace for " + "namespace feature '%s'"), tmp, + nodes[i]->name); + goto error; + } + } + lxcDef->ns_inherit_fd[feature] = fd; + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + VIR_FREE(nodes); + ctxt->node = node; + if (uses_lxc_ns) + *data = lxcDef; + else + VIR_FREE(lxcDef); + return 0; + error: + VIR_FREE(nodes); + lxcDomainDefNamespaceFree(lxcDef); + return -1; +} + + +static int +lxcDomainDefNamespaceFormatXML(virBufferPtr buf, + void *nsdata) +{ + lxcDomainDefPtr lxcDef = nsdata; + size_t j; + + if (!lxcDef) + return 0; + + virBufferAddLit(buf, "<lxc:namespace>\n"); + virBufferAdjustIndent(buf, 2); + + for (j = 0; j < VIR_DOMAIN_NAMESPACE_LAST; j++) { + switch ((virDomainNamespace) j) { + case VIR_DOMAIN_NAMESPACE_SHAREIPC: + case VIR_DOMAIN_NAMESPACE_SHAREUTS: + case VIR_DOMAIN_NAMESPACE_SHARENET: + { + if (lxcDef->ns_inherit_fd[j] > 0) { + virBufferAsprintf(buf, "<%s type='%s' value='%s'/>\n", + virDomainNamespaceTypeToString(j), + lxcDef->ns_type[j], + lxcDef->ns_val[j]); + } + } + break; + case VIR_DOMAIN_NAMESPACE_LAST: + break; + } + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</lxc:namespace>\n"); + return 0; +} + +static const char * +lxcDomainDefNamespaceHref(void) +{ + return "xmlns:lxc='" LXC_NAMESPACE_HREF "'"; +} + + +virDomainXMLNamespace virLXCDriverDomainXMLNamespace = { + .parse = lxcDomainDefNamespaceParse, + .free = lxcDomainDefNamespaceFree, + .format = lxcDomainDefNamespaceFormatXML, + .href = lxcDomainDefNamespaceHref, +}; + + static void virLXCDomainObjPrivateFree(void *data) { virLXCDomainObjPrivatePtr priv = data; @@ -73,7 +322,6 @@ static int virLXCDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } else { priv->initpid = thepid; } - return 0; } diff --git a/src/lxc/lxc_domain.h b/src/lxc/lxc_domain.h index 751aece..25df999 100644 --- a/src/lxc/lxc_domain.h +++ b/src/lxc/lxc_domain.h @@ -41,6 +41,7 @@ struct _virLXCDomainObjPrivate { virCgroupPtr cgroup; }; +extern virDomainXMLNamespace virLXCDriverDomainXMLNamespace; extern virDomainXMLPrivateDataCallbacks virLXCDriverPrivateDataCallbacks; extern virDomainDefParserConfig virLXCDriverDomainDefParserConfig; -- 1.9.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list