# HG changeset patch # User Dan Smith <danms@xxxxxxxxxx> # Date 1213891185 25200 # Node ID cb780a7b3ad591f1a9392d6528218b3aa2c3483d # Parent acf369a2543ad52b235ae8541c8ad05670e255bd [LXC] Add setup/cleanup of container network interfaces diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_conf.h --- a/src/lxc_conf.h Thu Jun 19 08:59:37 2008 -0700 +++ b/src/lxc_conf.h Thu Jun 19 08:59:45 2008 -0700 @@ -35,6 +35,12 @@ #define LXC_MAX_XML_LENGTH 16384 #define LXC_MAX_ERROR_LEN 1024 #define LXC_DOMAIN_TYPE "lxc" +#define LXC_PARENT_SOCKET 0 +#define LXC_CONTAINER_SOCKET 1 + +/* messages between parent and container */ +typedef char lxc_message_t; +#define LXC_CONTINUE_MSG 'c' /* types of networks for containers */ enum lxc_net_type { @@ -96,6 +102,8 @@ int parentTty; int containerTtyFd; char *containerTty; + + int sockpair[2]; lxc_vm_def_t *def; diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_container.c --- a/src/lxc_container.c Thu Jun 19 08:59:37 2008 -0700 +++ b/src/lxc_container.c Thu Jun 19 08:59:45 2008 -0700 @@ -36,6 +36,7 @@ #include "lxc_conf.h" #include "util.h" #include "memory.h" +#include "veth.h" #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) @@ -159,6 +160,74 @@ } /** + * lxcWaitForContinue: + * @vm: Pointer to vm structure + * + * This function will wait for the container continue message from the + * parent process. It will send this message on the socket pair stored in + * the vm structure once it has completed the post clone container setup. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcWaitForContinue(lxc_vm_t *vm) +{ + int rc = -1; + lxc_message_t msg; + int readLen = 0; + + readLen = saferead(vm->sockpair[LXC_CONTAINER_SOCKET], &msg, sizeof(msg)); + if (readLen != sizeof(msg)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Failed to read the container continue message: %s"), + strerror(errno)); + goto error_out; + } + + DEBUG0("Received container continue message"); + + close(vm->sockpair[LXC_PARENT_SOCKET]); + vm->sockpair[LXC_PARENT_SOCKET] = -1; + close(vm->sockpair[LXC_CONTAINER_SOCKET]); + vm->sockpair[LXC_CONTAINER_SOCKET] = -1; + + rc = 0; + +error_out: + return rc; +} + +#ifdef HAVE_NETNS +/** + * lxcEnableInterfaces: + * @vm: Pointer to vm structure + * + * This function will enable the interfaces for this container. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcEnableInterfaces(lxc_vm_t *vm) +{ + int rc = -1; + lxc_net_def_t *net = vm->def->nets; + int i = 0; + + for (i = 0; net; net = net->next) { + DEBUG("Enabling %s", net->containerVeth); + rc = vethInterfaceUpOrDown(net->containerVeth, 1); + if (0 != rc) { + goto error_out; + } + } + + /* enable lo device */ + rc = vethInterfaceUpOrDown("lo", 1); + +error_out: + return rc; +} +#endif /* HAVE_NETNS */ + +/** * lxcChild: * @argv: Pointer to container arguments * @@ -210,6 +279,18 @@ goto cleanup; } + /* Wait for interface devices to show up */ + if (0 != (rc = lxcWaitForContinue(vm))) { + goto cleanup; + } + +#ifdef HAVE_NETNS + /* enable interfaces */ + if (0 != (rc = lxcEnableInterfaces(vm))) { + goto cleanup; + } +#endif + rc = lxcExecWithTty(vm); /* this function will only return if an error occured */ diff -r acf369a2543a -r cb780a7b3ad5 src/lxc_driver.c --- a/src/lxc_driver.c Thu Jun 19 08:59:37 2008 -0700 +++ b/src/lxc_driver.c Thu Jun 19 08:59:45 2008 -0700 @@ -44,6 +44,9 @@ #include "memory.h" #include "util.h" #include "memory.h" +#include "bridge.h" +#include "qemu_conf.h" +#include "veth.h" /* debug macros */ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) @@ -66,6 +69,9 @@ #ifndef CLONE_NEWIPC #define CLONE_NEWIPC 0x08000000 #endif +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 /* New network namespace */ +#endif static int lxcStartup(void); static int lxcShutdown(void); @@ -81,6 +87,9 @@ { int rc = 0; int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| +#ifdef HAVE_NETNS + CLONE_NEWNET| +#endif CLONE_NEWIPC|SIGCHLD; int cpid; char *childStack; @@ -237,6 +246,9 @@ static int lxcNumDomains(virConnectPtr conn) { lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; + + DEBUG("driver: %p network: %p", conn->privateData, conn->networkPrivateData); + return driver->nactivevms; } @@ -384,6 +396,197 @@ return lxcGenerateXML(dom->conn, driver, vm, vm->def); } +#ifdef HAVE_NETNS +/** + * lxcSetupInterfaces: + * @conn: pointer to connection + * @vm: pointer to virtual machine structure + * + * Sets up the container interfaces by creating the veth device pairs and + * attaching the parent end to the appropriate bridge. The container end + * will moved into the container namespace later after clone has been called. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcSetupInterfaces(virConnectPtr conn, + lxc_vm_t *vm) +{ + int rc = -1; + struct qemud_driver *networkDriver = + (struct qemud_driver *)(conn->networkPrivateData); + lxc_net_def_t *net = vm->def->nets; + int i = 0; + char* bridge; + char parentVeth[PATH_MAX] = ""; + char containerVeth[PATH_MAX] = ""; + + for (i = 0; net; net = net->next) { + if (LXC_NET_NETWORK == net->type) { + virNetworkPtr network = virNetworkLookupByName(conn, net->txName); + if (!network) { + goto error_exit; + } + + bridge = virNetworkGetBridgeName(network); + + virNetworkFree(network); + + } else { + bridge = net->txName; + } + + DEBUG("bridge: %s", bridge); + if (NULL == bridge) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to get bridge for interface")); + goto error_exit; + } + + DEBUG0("calling vethCreate()"); + if (NULL != net->parentVeth) { + strcpy(parentVeth, net->parentVeth); + } + if (NULL != net->containerVeth) { + strcpy(containerVeth, net->containerVeth); + } + DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth); + if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to create veth device pair: %d"), rc); + goto error_exit; + } + if (NULL == net->parentVeth) { + net->parentVeth = strdup(parentVeth); + } + if (NULL == net->containerVeth) { + net->containerVeth = strdup(containerVeth); + } + + if (!(networkDriver->brctl) && (rc = brInit(&(networkDriver->brctl)))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("cannot initialize bridge support: %s"), + strerror(rc)); + goto error_exit; + } + + if (0 != (rc = brAddInterface(networkDriver->brctl, bridge, parentVeth))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to add %s device to %s: %s"), + parentVeth, + bridge, + strerror(rc)); + goto error_exit; + } + + if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to enable parent ns veth device: %d"), rc); + goto error_exit; + } + + } + + rc = 0; + +error_exit: + return rc; +} + +/** + * lxcMoveInterfacesToNetNs: + * @conn: pointer to connection + * @vm: pointer to virtual machine structure + * + * Starts a container process by calling clone() with the namespace flags + * + * Returns 0 on success or -1 in case of error + */ +static int lxcMoveInterfacesToNetNs(virConnectPtr conn, + lxc_vm_t *vm) +{ + int rc = -1; + lxc_net_def_t *net = vm->def->nets; + int i = 0; + + for (i = 0; net; net = net->next) { + if (0 != moveInterfaceToNetNs(net->containerVeth, vm->def->id)) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to move interface %s to ns %d"), + net->containerVeth, vm->def->id); + goto error_exit; + } + } + + rc = 0; + +error_exit: + return rc; +} + +/** + * lxcCleanupInterfaces: + * @conn: pointer to connection + * @vm: pointer to virtual machine structure + * + * Cleans up the container interfaces by deleting the veth device pairs. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcCleanupInterfaces(lxc_vm_t *vm) +{ + int rc = -1; + lxc_net_def_t *net = vm->def->nets; + int i = 0; + + for (i = 0; net; net = net->next) { + if (0 != (rc = vethDelete(net->parentVeth))) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to delete veth: %s"), net->parentVeth); + /* will continue to try to cleanup any other interfaces */ + } + + } + + rc = 0; + + return rc; +} +#endif /* HAVE_NETNS */ + +/** + * lxcSendContainerContinue: + * @vm: pointer to virtual machine structure + * + * Sends the continue message via the socket pair stored in the vm + * structure. + * + * Returns 0 on success or -1 in case of error + */ +static int lxcSendContainerContinue(lxc_vm_t *vm) +{ + int rc = -1; + lxc_message_t msg = LXC_CONTINUE_MSG; + int writeCount = 0; + + if (NULL == vm) { + goto error_out; + } + + writeCount = safewrite(vm->sockpair[LXC_PARENT_SOCKET], &msg, + sizeof(msg)); + if (writeCount != sizeof(msg)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("unable to send container continue message: %s"), + strerror(errno)); + goto error_out; + } + + rc = 0; + +error_out: + return rc; +} + /** * lxcStartContainer: * @conn: pointer to connection @@ -411,7 +614,11 @@ } stacktop = stack + stacksize; - flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER| +#ifdef HAVE_NETNS + CLONE_NEWNET| +#endif + CLONE_NEWIPC|SIGCHLD; vm->def->id = clone(lxcChild, stacktop, flags, (void *)vm); @@ -809,7 +1016,34 @@ close(vm->parentTty); close(vm->containerTtyFd); +#ifdef HAVE_NETNS + if (0 != (rc = lxcSetupInterfaces(conn, vm))) { + goto cleanup; + } +#endif /* HAVE_NETNS */ + + /* create a socket pair to send continue message to the container once */ + /* we've completed the post clone configuration */ + if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, vm->sockpair)) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("sockpair failed: %s"), strerror(errno)); + goto cleanup; + } + + /* check this rc */ + rc = lxcStartContainer(conn, driver, vm); + +#ifdef HAVE_NETNS + rc = lxcMoveInterfacesToNetNs(conn, vm); +#endif + + rc = lxcSendContainerContinue(vm); + + close(vm->sockpair[LXC_PARENT_SOCKET]); + vm->sockpair[LXC_PARENT_SOCKET] = -1; + close(vm->sockpair[LXC_CONTAINER_SOCKET]); + vm->sockpair[LXC_CONTAINER_SOCKET] = -1; if (rc == 0) { vm->state = VIR_DOMAIN_RUNNING; @@ -948,6 +1182,11 @@ int waitRc; int childStatus = -1; + /* if this fails, we'll continue. it will report any errors */ +#ifdef HAVE_NETNS + lxcCleanupInterfaces(vm); +#endif + while (((waitRc = waitpid(vm->def->id, &childStatus, 0)) == -1) && errno == EINTR); -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list