Re: [PATCH] Inherit namespace feature

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks Daniel and Michal again for your valuable inputs.  Please check my reply with text <imran> for some of your comments.  And request you to help on those.

BTW:  should i reply with rework in the new patch. or i should reply to this thread itself?  Sorry i am new to the community so yet to get familiar with etiquette.

On Thu, Jul 30, 2015 at 7:36 PM, Daniel P. Berrange <berrange@xxxxxxxxxx> wrote:
On Wed, Jul 01, 2015 at 11:07:07PM +0530, ik.nitk wrote:
> This patch adds feature for lxc containers to inherit namespaces. This is very similar to what
> lxc-tools or docker provides.  Look for "man lxc-start" and you will find that you can pass command args as
> [ --share-[net|ipc|uts] name|pid ]. Or check out docker networking option in which you can give --net=container:NAME_or_ID as an option for sharing namespace.
>
> >From this patch you can add extra libvirt option to share namespace in following way.
>  <lxc:namespace>
>    <lxc:sharenet type='netns' value='red'/>
>    <lxc:shareipc type='pid' value='12345'/>
>    <lxc:shareuts type='name' value='container1'/>
>  </lxc:namespace>
>
>
> ---
>  docs/drvlxc.html.in                   |  18 +++
>  docs/schemas/domaincommon.rng         |  42 ++++++
>  src/Makefile.am                       |   4 +-
>  src/lxc/lxc_conf.c                    |   2 +-
>  src/lxc/lxc_conf.h                    |  15 +++
>  src/lxc/lxc_container.c               | 236 +++++++++++++++++++++++++++++++++-
>  src/lxc/lxc_domain.c                  | 164 ++++++++++++++++++++++-
>  src/lxc/lxc_domain.h                  |   1 +
>  tests/lxcxml2xmldata/lxc-sharenet.xml |  33 +++++
>  tests/lxcxml2xmltest.c                |   1 +
>  10 files changed, 507 insertions(+), 9 deletions(-)
>  create mode 100644 tests/lxcxml2xmldata/lxc-sharenet.xml
>
> diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
> index a094bd9..d14d4c7 100644
> --- a/docs/drvlxc.html.in
> +++ b/docs/drvlxc.html.in
> @@ -590,6 +590,24 @@ Note that allowing capabilities that are normally dropped by default can serious
>  affect the security of the container and the host.
>  </p>
>
> +<h2><a name="share">Inherit namespaces</a></h2>
> +
> +<p>
> +Libvirt allows you to inherit the namespace from container/process just like lxc tools
> +or docker provides to share the network namespace. The following can be used to share
> +required namespaces. If we want to share only one then the other namespaces can be ignored.
> +</p>
> +<pre>
> +&lt;domain type='lxc' xmlns:lxc='http://libvirt.org/schemas/domain/lxc/1.0'&gt;
> +...
> +&lt;lxc:namespace&gt;
> +  &lt;lxc:sharenet type='netns' value='red'/&gt;
> +  &lt;lxc:shareuts type='name' value='container1'/&gt;
> +  &lt;lxc:shareipc type='pid' value='12345'/&gt;
> +&lt;/lxc:namespace&gt;
> +&lt;/domain&gt;
> +</pre>

Could you also document the attributes explicitly, the various 'type'
attribute values and what they expect for the corresponding 'value'
argument. In particular I'm unclear on what the value is when
using type='netns', so its a good idea to be explicit about this.

<imran>:   Netns is generally the name of network namespace present in /var/run/netns/   So this will be familiar for folks who are using docker netns option.  please check http://man7.org/linux/man-pages/man8/ip-netns.8.html

 
> diff --git a/src/Makefile.am b/src/Makefile.am
> index be63e26..ef96a5a 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -1319,7 +1319,7 @@ libvirt_driver_lxc_impl_la_CFLAGS = \
>               -I$(srcdir)/access \
>               -I$(srcdir)/conf \
>               $(AM_CFLAGS)
> -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS)
> +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(LIBXML_LIBS) libvirt-lxc.la $(FUSE_LIBS)

What was the $LIBXML_LIBS addition needed for ?

I can see why you added libvirt-lxc.la but I will suggested
changes later to avoid that.

<imran>:  This required as we are adding new XML options.  

>  if WITH_BLKID
>  libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS)
>  libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS)
> @@ -2709,6 +2709,8 @@ libvirt_lxc_LDADD =                     \
>               libvirt-net-rpc.la \
>               libvirt_security_manager.la \
>               libvirt_conf.la \
> +             libvirt.la \
> +             libvirt-lxc.la \

Again I want us to avoid this too

>               libvirt_util.la \
>               ../gnulib/lib/libgnu.la
>  if WITH_DTRACE_PROBES


> diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
> index 11e9514..d8362ab 100644
> --- a/src/lxc/lxc_container.c
> +++ b/src/lxc/lxc_container.c
> @@ -27,6 +27,7 @@
>  #include <config.h>
>
>  #include <fcntl.h>
> +#include <sched.h>
>  #include <limits.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> @@ -38,7 +39,6 @@
>  #include <mntent.h>
>  #include <sys/reboot.h>
>  #include <linux/reboot.h>
> -
>  /* Yes, we want linux private one, for _syscall2() macro */
>  #include <linux/unistd.h>

Try to avoid adding/removing random whitespace in patches. If you
think something warrents a cleanup just send a separate patch

>
> @@ -2321,6 +2321,181 @@ virArch lxcContainerGetAlt32bitArch(virArch arch)
>      return VIR_ARCH_NONE;
>  }
>
> +struct ns_info {
> +        const char *proc_name;
> +        int clone_flag;

We usually use capitalization in struct / type names not
underscores. Also try to always use a prefix to make it
clearer that its libvirt code not some system header.
so eg  lxcNSInfo

<imran>:  will edit and re-send. 

> +}ns_info_local[VIR_DOMAIN_NAMESPACE_LAST] = {


> +    [VIR_DOMAIN_NAMESPACE_SHARENET] = {"net", CLONE_NEWNET},
> +    [VIR_DOMAIN_NAMESPACE_SHAREIPC] = {"ipc", CLONE_NEWIPC},
> +    [VIR_DOMAIN_NAMESPACE_SHAREUTS] = {"uts", CLONE_NEWUTS}
> +};
> +
> +static int lxcOpen_ns(lxcDomainDefPtr lxcDef, int ns_fd[VIR_DOMAIN_NAMESPACE_LAST])

We mostly use capitlization in method names rather than underscores
so eg lxcOpenNS

> +{
> +    int i, n, rc = 0;
> +    virDomainPtr dom = NULL;
> +    virConnectPtr conn = NULL;
> +    pid_t pid;
> +    int nfdlist;
> +    int *fdlist;
> +    char *path = NULL;
> +    char *eptr;
> +    for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++)
> +        ns_fd[i] = -1;
> +
> +    if (STREQ_NULLABLE("netns", lxcDef->ns_type[VIR_DOMAIN_NAMESPACE_SHARENET])) {
> +        if (virAsprintf(&path, "/var/run/netns/%s", lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]) < 0)
> +            return  -1;

Interesting - what is responsible for the /var/run/netns/ entries ? Is that
a standardized convention somewhere.

<imran>:  Yes this is the standard. please check this link
      http://man7.org/linux/man-pages/man8/ip-netns.8.html

 
> +        ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] = open(path, O_RDONLY);
> +        VIR_FREE(path);
> +        if (ns_fd[VIR_DOMAIN_NAMESPACE_SHARENET] < 0) {
> +            virReportSystemError(errno,
> +                      _("failed to open netns %s"), lxcDef->ns_val[VIR_DOMAIN_NAMESPACE_SHARENET]);
> +            return -1;
> +        }
> +    }
> +    for (i = 0; i < VIR_DOMAIN_NAMESPACE_LAST; i++) {
> +        /* If not yet intialized by above: netns*/
> +        if (lxcDef->ns_type[i] && ns_fd[i] == -1) {
> +            pid = strtol(lxcDef->ns_val[i], &eptr, 10);
> +            if (*eptr != '\0' || pid < 1) {
> +                /* check if the domain is running, then set the namespaces
> +                 * to that container
> +                 */
> +                const char *ns[] = { "user", "ipc", "uts", "net", "pid", "mnt" };
> +                conn = virConnectOpen("lxc:///");
> +                if (!conn) {
> +                    virReportError(virGetLastError()->code,
> +                      _("unable to get connect to lxc %s"), lxcDef->ns_val[i]);
> +                    rc = -1;
> +                    goto cleanup;
> +                }
> +                dom = virDomainLookupByName(conn, lxcDef->ns_val[i]);
> +                if (!dom) {
> +                    virReportError(virGetLastError()->code,
> +                                   _("Unable to lookup peer containeri %s"),
> +                                   lxcDef->ns_val[i]);
> +                    rc = -1;
> +                    goto cleanup;
> +                }
> +                if ((nfdlist = virDomainLxcOpenNamespace(dom, &fdlist, 0)) < 0) {
> +                    virReportError(virGetLastError()->code,
> +                                   _("Unable to open %s"), lxcDef->ns_val[i]);
> +                    rc = -1;
> +                    goto cleanup;
> +                }

I really hate the idea of the libvirt_lxc program opening a connection
back to libvirtd using virConnectOpen, as that creates a circular
dependancy. It also risks deadlock, because libvirtd will be holding
locks while starting up the container, and you're calling back into
the driver which may then end up acquiring the same lock.

<imran>:  This is where i am finding problem to code.  All the driver functions are static and to access them i might have to change the static to non-static.which will not be inline with current design. i don't see circular dependency with this approach as the internal connection is just used to get the fd's. please share any approach to handle this or hope i can keep the current code.

 
I think a better approach in general is for libvirtd  lxc_process.c
code to be responsible for getting access to all the namespace
file descriptors. We can then pass those pre-opened file descrpitors
down into libvirt_lxc using command line args, in the sme way that
we pass down file descriptors for pre-opened TAP devices.

eg so we end up running

 libvirt_lxc --netns 23 --pidns 24 --utsns 25

or something like that.

<imran>: And useability wise its easier to provide names instead of fds. as if the shared container goes down. the fds wont be valid.
 with name a simple restart and again get the new fds with names automatically.


 


> +                for (n = 0; n < ARRAY_CARDINALITY(ns); n++) {
> +                    if (STREQ(ns[n], ns_info_local[i].proc_name)) {
> +                        ns_fd[i] = fdlist[n];
> +                    } else {
> +                        if (VIR_CLOSE(fdlist[n]) < 0)
> +                           VIR_ERROR(_("failed to close fd. ignoring.."));
> +                    }
> +                }
> +                if (nfdlist > 0)
> +                    VIR_FREE(fdlist);
> +            } else {
> +                if (virAsprintf(&path, "/proc/%d/ns/%s", pid, ns_info_local[i].proc_name) < 0)
> +                    return -1;
> +                ns_fd[i] = open(path, O_RDONLY);
> +                VIR_FREE(path);
> +                if (ns_fd[i] < 0) {
> +                    virReportSystemError(errno,
> +                      _("failed to open ns %s"), lxcDef->ns_val[i]);
> +                    return -1;
> +                }
> +            }
> +        }
> +    }
> + cleanup:
> +    if (dom)
> +        virDomainFree(dom);
> +    if (conn)
> +        virConnectClose(conn);
> +    return rc;
> +}

Regards,
Daniel
--
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list

[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]