Re: [PATCH] conntrackd: make conntrackd namespace aware

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 31, 2012 at 6:11 PM, Ansis Atteka <aatteka@xxxxxxxxxx> wrote:
> This patch allows conntrackd to open CT Netlink sockets into a given
> network namespace. Channel sockets (e.g. UDP) would still be opened into
> the same namespace where conntrackd was started.
>
> The only binary this patch affects is conntrackd. All other binaries (e.g.
> conntrack, nfct) would still operate in the same namespace where they were
> started.
>
> To make use of this patch:
> 1. create a network namespace: "ip netns add the_ns"
> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf
> file inside General {...} section.

Wanted to provide more details about this patch and also bump it up
for attention.

Basically, what it does is allows conntrackd to open Conntrack Netlink
sockets into a different namespace than where Channel Sockets were
opened. This isolation brings benefits to:
1. security, because the channel socket (and management interface) will
reside in a different namespace. They won't be exposed to the traffic
that traverses the namespace;
2. flexibility, because arbitrary IP addresses could be used inside that
namespace for Connection Tracking purposes. No need to worry that
there might be overlapping IP addresses with the Management interface;
3. scalability w.r.t. namespaces, because all the namespaces would
end up using a single management interface and IP address in the root
namespace. There wouldn't be need to maintain a dedicated
management interface and IP address inside every namespace.

Also this patch would prepare soil for my next patches, that would
ease connection state synchronization for virtualized networks even more:
1. allow single conntrackd instance to synchronize multiple namespaces;
2. add configuration dynamically to conntrackd (without restarting
the daemon).

Those other two patches involve quite a lot of code re-factoring and
depend on this patch. So comments are welcome!

Thanks,
Ansis

>
> Signed-off-by: Ansis Atteka <aatteka@xxxxxxxxxx>
> ---
>  include/Makefile.am   |    2 +-
>  include/conntrackd.h  |    2 +
>  include/namespace.h   |   12 ++++++
>  src/Makefile.am       |    3 +-
>  src/cthelper.c        |    3 +-
>  src/ctnl.c            |   20 +++++++--
>  src/expect.c          |    4 +-
>  src/external_inject.c |    3 +-
>  src/internal_bypass.c |    5 ++-
>  src/namespace.c       |  112 +++++++++++++++++++++++++++++++++++++++++++++++++
>  src/netlink.c         |    6 ++-
>  src/read_config_lex.l |    1 +
>  src/read_config_yy.y  |    8 +++-
>  src/run.c             |    3 ++
>  src/sync-mode.c       |    4 +-
>  src/sync-notrack.c    |    3 +-
>  16 files changed, 175 insertions(+), 16 deletions(-)
>  create mode 100644 include/namespace.h
>  create mode 100644 src/namespace.c
>
> diff --git a/include/Makefile.am b/include/Makefile.am
> index 6bd0f7f..e06fd4d 100644
> --- a/include/Makefile.am
> +++ b/include/Makefile.am
> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \
>                  network.h filter.h queue.h vector.h cidr.h \
>                  traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \
>                  process.h origin.h internal.h external.h date.h nfct.h \
> -                helper.h myct.h stack.h
> +                helper.h myct.h stack.h namespace.h
>
> diff --git a/include/conntrackd.h b/include/conntrackd.h
> index 19e613c..c349d72 100644
> --- a/include/conntrackd.h
> +++ b/include/conntrackd.h
> @@ -94,6 +94,7 @@ struct ct_conf {
>         int channel_type_global;
>         struct channel_conf channel[MULTICHANNEL_MAX];
>         struct local_conf local;        /* unix socket facilities */
> +       char netlink_namespace[FILENAME_MAXLEN];
>         int nice;
>         int limit;
>         int refresh;
> @@ -143,6 +144,7 @@ struct ct_conf {
>  #define STATE(x) st.x
>
>  struct ct_general_state {
> +       int                                     ns_fd;          /* namespace fd for NL sockets */
>         sigset_t                        block;
>         FILE                            *log;
>         FILE                            *stats_log;
> diff --git a/include/namespace.h b/include/namespace.h
> new file mode 100644
> index 0000000..668a270
> --- /dev/null
> +++ b/include/namespace.h
> @@ -0,0 +1,12 @@
> +#ifndef _NAMESPACE_H_
> +#define _NAMESPACE_H_
> +
> +#include <libmnl/libmnl.h>
> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> +
> +void init_namespaces(void);
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int);
> +struct mnl_socket *mnl_socket_open_ns(int, int);
> +
> +#endif
> diff --git a/src/Makefile.am b/src/Makefile.am
> index d8074d2..60314ab 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \
>                     external_cache.c external_inject.c \
>                     internal_cache.c internal_bypass.c \
>                     read_config_yy.y read_config_lex.l \
> -                   stack.c helpers.c utils.c expect.c
> +                   stack.c helpers.c utils.c expect.c \
> +                   namespace.c
>
>  # yacc and lex generate dirty code
>  read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls
> diff --git a/src/cthelper.c b/src/cthelper.c
> index c119869..b165900 100644
> --- a/src/cthelper.c
> +++ b/src/cthelper.c
> @@ -22,6 +22,7 @@
>  #include "log.h"
>  #include "fds.h"
>  #include "helper.h"
> +#include "namespace.h"
>
>  #include <unistd.h>
>  #include <fcntl.h>
> @@ -493,7 +494,7 @@ int cthelper_init(void)
>                 return -1;
>         }
>
> -       STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER);
> +       STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd));
>         if (STATE_CTH(nl) == NULL) {
>                 dlog(LOG_ERR, "cannot open nfq socket");
>                 return -1;
> diff --git a/src/ctnl.c b/src/ctnl.c
> index bb54727..65784c3 100644
> --- a/src/ctnl.c
> +++ b/src/ctnl.c
> @@ -29,6 +29,7 @@
>  #include "origin.h"
>  #include "date.h"
>  #include "internal.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <signal.h>
> @@ -399,6 +400,17 @@ static void poll_cb(void *data)
>
>  int ctnl_init(void)
>  {
> +       if (CONFIG(netlink_namespace)[0]) {
> +               STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY);
> +               if (STATE(ns_fd) == -1) {
> +                       dlog(LOG_ERR, "could not open network namespace %s: %s",
> +                                CONFIG(netlink_namespace), strerror(errno));
> +                       return -1;
> +               }
> +       } else {
> +               STATE(ns_fd) = -1;
> +       }
> +
>         if (CONFIG(flags) & CTD_STATS_MODE)
>                 STATE(mode) = &stats_mode;
>         else if (CONFIG(flags) & CTD_SYNC_MODE)
> @@ -417,7 +429,7 @@ int ctnl_init(void)
>         }
>
>         /* resynchronize (like 'dump' socket) but it also purges old entries */
> -       STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(resync)== NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> @@ -438,7 +450,7 @@ int ctnl_init(void)
>         fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK);
>
>         if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) {
> -               STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +               STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>                 if (STATE(dump) == NULL) {
>                         dlog(LOG_ERR, "can't open netlink handler: %s",
>                              strerror(errno));
> @@ -467,7 +479,7 @@ int ctnl_init(void)
>                 }
>         }
>
> -       STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(get) == NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> @@ -481,7 +493,7 @@ int ctnl_init(void)
>                                         exp_get_handler, NULL);
>         }
>
> -       STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (STATE(flush) == NULL) {
>                 dlog(LOG_ERR, "cannot open flusher handler");
>                 return -1;
> diff --git a/src/expect.c b/src/expect.c
> index 6069770..bedec2c 100644
> --- a/src/expect.c
> +++ b/src/expect.c
> @@ -8,7 +8,9 @@
>   * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com>
>   */
>
> +#include "conntrackd.h"
>  #include "helper.h"
> +#include "namespace.h"
>
>  #include <stdio.h>
>  #include <string.h>
> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd)
>         int ret;
>         struct nfct_handle *h;
>
> -       h = nfct_open(EXPECT, 0);
> +       h = nfct_open_ns(EXPECT, 0, STATE(ns_fd));
>         if (!h)
>                 return -1;
>
> diff --git a/src/external_inject.c b/src/external_inject.c
> index 0ad3478..5a6680b 100644
> --- a/src/external_inject.c
> +++ b/src/external_inject.c
> @@ -22,6 +22,7 @@
>  #include "cache.h"
>  #include "origin.h"
>  #include "external.h"
> +#include "namespace.h"
>  #include "netlink.h"
>
>  #include <libnetfilter_conntrack/libnetfilter_conntrack.h>
> @@ -42,7 +43,7 @@ struct {
>  static int external_inject_init(void)
>  {
>         /* handler to directly inject conntracks into kernel-space */
> -       inject = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (inject == NULL) {
>                 dlog(LOG_ERR, "can't open netlink handler: %s",
>                      strerror(errno));
> diff --git a/src/internal_bypass.c b/src/internal_bypass.c
> index 1194339..520c55d 100644
> --- a/src/internal_bypass.c
> +++ b/src/internal_bypass.c
> @@ -16,6 +16,7 @@
>  #include "netlink.h"
>  #include "network.h"
>  #include "origin.h"
> +#include "namespace.h"
>
>  static int internal_bypass_init(void)
>  {
> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> diff --git a/src/namespace.c b/src/namespace.c
> new file mode 100644
> index 0000000..03db222
> --- /dev/null
> +++ b/src/namespace.c
> @@ -0,0 +1,112 @@
> +/*
> + * Copyright (C) 2012 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + *
> + * Authors:
> + *     Ansis Atteka <aatteka@xxxxxxxxxx>
> + */
> +#define _GNU_SOURCE
> +
> +#include "namespace.h"
> +
> +#include <fcntl.h>
> +#include <sched.h>
> +#include <sys/syscall.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +
> +#include "conntrackd.h"
> +#include "log.h"
> +
> +#ifndef CLONE_NEWNET
> +#define CLONE_NEWNET 0x40000000
> +#endif
> +
> +#ifndef __NR_setns
> +#ifdef __x86_64
> +#define __NR_setns 308
> +#endif
> +#ifdef __i386
> +#define __NR_setns 346
> +#endif
> +#endif
> +
> +#ifdef __NR_setns
> +
> +static int root_fd = -1;
> +
> +void init_namespaces(void) {
> +       root_fd = open("/proc/self/ns/net", O_RDONLY);
> +       if (root_fd == -1) {
> +               dlog(LOG_WARNING, "could not open current namespace");
> +       }
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +                                                                int ns_fd) {
> +       struct nfct_handle *handle = NULL;
> +
> +       if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +               dlog(LOG_ERR, "could not switch between namespaces");
> +       } else {
> +               handle = nfct_open(subsys_id, subscriptions);
> +               if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +                       dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +               }
> +       }
> +       return handle;
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +       struct mnl_socket *handle = NULL;
> +
> +       if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) {
> +               dlog(LOG_ERR, "could not switch between namespaces");
> +       } else {
> +               handle = mnl_socket_open(bus);
> +               if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) {
> +                       dlog(LOG_ERR, "fatal: could not switch back to main namespace");
> +               }
> +       }
> +       return handle;
> +}
> +
> +#else
> +
> +void init_namespaces(void) {
> +}
> +
> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions,
> +                                                                int ns_fd) {
> +       if (ns_fd == -1) {
> +               return nfct_open(subsys_id, subscriptions);
> +       } else {
> +               dlog(LOG_ERR, "network namespaces are not supported on this system");
> +               return NULL;
> +       }
> +}
> +
> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) {
> +       if (ns_fd == -1) {
> +               return mnl_socket_open(bus);
> +       } else {
> +               dlog(LOG_ERR, "network namespaces are not supported on this system");
> +               return NULL;
> +       }
> +}
> +
> +#endif
> diff --git a/src/netlink.c b/src/netlink.c
> index bd38d99..c71b463 100644
> --- a/src/netlink.c
> +++ b/src/netlink.c
> @@ -21,6 +21,7 @@
>  #include "conntrackd.h"
>  #include "filter.h"
>  #include "log.h"
> +#include "namespace.h"
>
>  #include <string.h>
>  #include <errno.h>
> @@ -33,7 +34,8 @@ struct nfct_handle *nl_init_event_handler(void)
>  {
>         struct nfct_handle *h;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, CONFIG(netlink).groups);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, CONFIG(netlink).groups,
> +                                        STATE(ns_fd));
>         if (h == NULL)
>                 return NULL;
>
> @@ -175,7 +177,7 @@ int nl_flush_conntrack_table_selective(void)
>         struct nfct_handle *h;
>         int ret;
>
> -       h = nfct_open(CONNTRACK, 0);
> +       h = nfct_open_ns(CONNTRACK, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "cannot open handle");
>                 return -1;
> diff --git a/src/read_config_lex.l b/src/read_config_lex.l
> index 31fa32e..7a09c52 100644
> --- a/src/read_config_lex.l
> +++ b/src/read_config_lex.l
> @@ -91,6 +91,7 @@ notrack               [N|n][O|o][T|t][R|r][A|a][C|c][K|k]
>  "SocketBufferSizeMaxGrowth"    { return T_BUFFER_SIZE_MAX_GROWN; /* alias */ }
>  "NetlinkBufferSize"            { return T_BUFFER_SIZE; }
>  "NetlinkBufferSizeMaxGrowth"   { return T_BUFFER_SIZE_MAX_GROWN; }
> +"NetlinkNamespace"             { return T_NETLINK_NAMESPACE; }
>  "Mode"                         { return T_SYNC_MODE; }
>  "ListenTo"                     { return T_LISTEN_TO; }
>  "Family"                       { return T_FAMILY; }
> diff --git a/src/read_config_yy.y b/src/read_config_yy.y
> index c9235d3..b0985e7 100644
> --- a/src/read_config_yy.y
> +++ b/src/read_config_yy.y
> @@ -87,7 +87,7 @@ enum {
>  %token T_DISABLE_INTERNAL_CACHE T_DISABLE_EXTERNAL_CACHE T_ERROR_QUEUE_LENGTH
>  %token T_OPTIONS T_TCP_WINDOW_TRACKING T_EXPECT_SYNC
>  %token T_HELPER T_HELPER_QUEUE_NUM T_HELPER_POLICY T_HELPER_EXPECT_MAX
> -%token T_HELPER_EXPECT_TIMEOUT
> +%token T_HELPER_EXPECT_TIMEOUT T_NETLINK_NAMESPACE
>
>  %token <string> T_IP T_PATH_VAL
>  %token <val> T_NUMBER
> @@ -1113,6 +1113,7 @@ general_line: hashsize
>             | unix_line
>             | netlink_buffer_size
>             | netlink_buffer_size_max_grown
> +           | netlink_namespace
>             | family
>             | event_iterations_limit
>             | poll_secs
> @@ -1158,6 +1159,11 @@ netlink_events_reliable : T_NETLINK_EVENTS_RELIABLE T_OFF
>         conf.netlink.events_reliable = 0;
>  };
>
> +netlink_namespace : T_NETLINK_NAMESPACE T_PATH_VAL
> +{
> +       strncpy(conf.netlink_namespace, $2, FILENAME_MAXLEN);
> +};
> +
>  nice : T_NICE T_SIGNED_NUMBER
>  {
>         conf.nice = $2;
> diff --git a/src/run.c b/src/run.c
> index 3337694..7aa5032 100644
> --- a/src/run.c
> +++ b/src/run.c
> @@ -30,6 +30,7 @@
>  #include "origin.h"
>  #include "date.h"
>  #include "internal.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <signal.h>
> @@ -252,6 +253,8 @@ init(void)
>                 return -1;
>
>         /* Initialization */
> +       init_namespaces();
> +
>         if (CONFIG(flags) & (CTD_SYNC_MODE | CTD_STATS_MODE))
>                 if (ctnl_init() < 0)
>                         return -1;
> diff --git a/src/sync-mode.c b/src/sync-mode.c
> index e69ecfe..de58b8c 100644
> --- a/src/sync-mode.c
> +++ b/src/sync-mode.c
> @@ -31,6 +31,7 @@
>  #include "origin.h"
>  #include "internal.h"
>  #include "external.h"
> +#include "namespace.h"
>
>  #include <errno.h>
>  #include <unistd.h>
> @@ -451,7 +452,8 @@ static int init_sync(void)
>                         tx_queue_cb, NULL, STATE(fds)) == -1)
>                 return -1;
>
> -       STATE_SYNC(commit).h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       STATE_SYNC(commit).h = nfct_open_ns(CONFIG(netlink).subsys_id, 0,
> +                                                                               STATE(ns_fd));
>         if (STATE_SYNC(commit).h == NULL) {
>                 dlog(LOG_ERR, "can't create handler to commit");
>                 return -1;
> diff --git a/src/sync-notrack.c b/src/sync-notrack.c
> index a7df4e7..d8010fb 100644
> --- a/src/sync-notrack.c
> +++ b/src/sync-notrack.c
> @@ -24,6 +24,7 @@
>  #include "log.h"
>  #include "cache.h"
>  #include "fds.h"
> +#include "namespace.h"
>
>  #include <string.h>
>
> @@ -102,7 +103,7 @@ static void kernel_resync(void)
>         u_int32_t family = AF_UNSPEC;
>         int ret;
>
> -       h = nfct_open(CONFIG(netlink).subsys_id, 0);
> +       h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd));
>         if (h == NULL) {
>                 dlog(LOG_ERR, "can't allocate memory for the internal cache");
>                 return;
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux