Hi Ansis, On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote: > This patch allows conntrackd to open CT Netlink sockets into a given > network namespace. Channel sockets (e.g. UDP) would still be opened into > the same namespace where conntrackd was started. > > The only binary this patch affects is conntrackd. All other binaries (e.g. > conntrack, nfct) would still operate in the same namespace where they were > started. > > To make use of this patch: > 1. create a network namespace: "ip netns add the_ns" > 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf > file inside General {...} section. > > Signed-off-by: Ansis Atteka <aatteka@xxxxxxxxxx> > --- > include/Makefile.am | 2 +- > include/conntrackd.h | 2 + > include/namespace.h | 12 ++++++ > src/Makefile.am | 3 +- > src/cthelper.c | 3 +- > src/ctnl.c | 20 +++++++-- > src/expect.c | 4 +- > src/external_inject.c | 3 +- > src/internal_bypass.c | 5 ++- > src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ > src/netlink.c | 6 ++- > src/read_config_lex.l | 1 + > src/read_config_yy.y | 8 +++- > src/run.c | 3 ++ > src/sync-mode.c | 4 +- > src/sync-notrack.c | 3 +- > 16 files changed, 175 insertions(+), 16 deletions(-) > create mode 100644 include/namespace.h > create mode 100644 src/namespace.c > > diff --git a/include/Makefile.am b/include/Makefile.am > index 6bd0f7f..e06fd4d 100644 > --- a/include/Makefile.am > +++ b/include/Makefile.am > @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ > network.h filter.h queue.h vector.h cidr.h \ > traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ > process.h origin.h internal.h external.h date.h nfct.h \ > - helper.h myct.h stack.h > + helper.h myct.h stack.h namespace.h > > diff --git a/include/conntrackd.h b/include/conntrackd.h > index 19e613c..c349d72 100644 > --- a/include/conntrackd.h > +++ b/include/conntrackd.h > @@ -94,6 +94,7 @@ struct ct_conf { > int channel_type_global; > struct channel_conf channel[MULTICHANNEL_MAX]; > struct local_conf local; /* unix socket facilities */ > + char netlink_namespace[FILENAME_MAXLEN]; > int nice; > int limit; > int refresh; > @@ -143,6 +144,7 @@ struct ct_conf { > #define STATE(x) st.x > > struct ct_general_state { > + int ns_fd; /* namespace fd for NL sockets */ We use the same coding style of the Linux kernel. Please, break lines at 80 chars. > sigset_t block; > FILE *log; > FILE *stats_log; > diff --git a/include/namespace.h b/include/namespace.h > new file mode 100644 > index 0000000..668a270 > --- /dev/null > +++ b/include/namespace.h > @@ -0,0 +1,12 @@ > +#ifndef _NAMESPACE_H_ > +#define _NAMESPACE_H_ > + > +#include <libmnl/libmnl.h> > +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> > + > +void init_namespaces(void); > + > +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); > +struct mnl_socket *mnl_socket_open_ns(int, int); > + > +#endif > diff --git a/src/Makefile.am b/src/Makefile.am > index d8074d2..60314ab 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ > external_cache.c external_inject.c \ > internal_cache.c internal_bypass.c \ > read_config_yy.y read_config_lex.l \ > - stack.c helpers.c utils.c expect.c > + stack.c helpers.c utils.c expect.c \ > + namespace.c > > # yacc and lex generate dirty code > read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls > diff --git a/src/cthelper.c b/src/cthelper.c > index c119869..b165900 100644 > --- a/src/cthelper.c > +++ b/src/cthelper.c > @@ -22,6 +22,7 @@ > #include "log.h" > #include "fds.h" > #include "helper.h" > +#include "namespace.h" > > #include <unistd.h> > #include <fcntl.h> > @@ -493,7 +494,7 @@ int cthelper_init(void) > return -1; > } > > - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); > + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); > if (STATE_CTH(nl) == NULL) { > dlog(LOG_ERR, "cannot open nfq socket"); > return -1; > diff --git a/src/ctnl.c b/src/ctnl.c > index bb54727..65784c3 100644 > --- a/src/ctnl.c > +++ b/src/ctnl.c > @@ -29,6 +29,7 @@ > #include "origin.h" > #include "date.h" > #include "internal.h" > +#include "namespace.h" > > #include <errno.h> > #include <signal.h> > @@ -399,6 +400,17 @@ static void poll_cb(void *data) > > int ctnl_init(void) > { > + if (CONFIG(netlink_namespace)[0]) { > + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); > + if (STATE(ns_fd) == -1) { > + dlog(LOG_ERR, "could not open network namespace %s: %s", > + CONFIG(netlink_namespace), strerror(errno)); > + return -1; > + } > + } else { > + STATE(ns_fd) = -1; > + } > + > if (CONFIG(flags) & CTD_STATS_MODE) > STATE(mode) = &stats_mode; > else if (CONFIG(flags) & CTD_SYNC_MODE) > @@ -417,7 +429,7 @@ int ctnl_init(void) > } > > /* resynchronize (like 'dump' socket) but it also purges old entries */ > - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(resync)== NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -438,7 +450,7 @@ int ctnl_init(void) > fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); > > if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { > - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(dump) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -467,7 +479,7 @@ int ctnl_init(void) > } > } > > - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(get) == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > @@ -481,7 +493,7 @@ int ctnl_init(void) > exp_get_handler, NULL); > } > > - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); > + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (STATE(flush) == NULL) { > dlog(LOG_ERR, "cannot open flusher handler"); > return -1; > diff --git a/src/expect.c b/src/expect.c > index 6069770..bedec2c 100644 > --- a/src/expect.c > +++ b/src/expect.c > @@ -8,7 +8,9 @@ > * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> > */ > > +#include "conntrackd.h" > #include "helper.h" > +#include "namespace.h" > > #include <stdio.h> > #include <string.h> > @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) > int ret; > struct nfct_handle *h; > > - h = nfct_open(EXPECT, 0); > + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); > if (!h) > return -1; > > diff --git a/src/external_inject.c b/src/external_inject.c > index 0ad3478..5a6680b 100644 > --- a/src/external_inject.c > +++ b/src/external_inject.c > @@ -22,6 +22,7 @@ > #include "cache.h" > #include "origin.h" > #include "external.h" > +#include "namespace.h" > #include "netlink.h" > > #include <libnetfilter_conntrack/libnetfilter_conntrack.h> > @@ -42,7 +43,7 @@ struct { > static int external_inject_init(void) > { > /* handler to directly inject conntracks into kernel-space */ > - inject = nfct_open(CONFIG(netlink).subsys_id, 0); > + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (inject == NULL) { > dlog(LOG_ERR, "can't open netlink handler: %s", > strerror(errno)); > diff --git a/src/internal_bypass.c b/src/internal_bypass.c > index 1194339..520c55d 100644 > --- a/src/internal_bypass.c > +++ b/src/internal_bypass.c > @@ -16,6 +16,7 @@ > #include "netlink.h" > #include "network.h" > #include "origin.h" > +#include "namespace.h" > > static int internal_bypass_init(void) > { > @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) > u_int32_t family = AF_UNSPEC; > int ret; > > - h = nfct_open(CONFIG(netlink).subsys_id, 0); > + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); > if (h == NULL) { > dlog(LOG_ERR, "can't allocate memory for the internal cache"); > return; > diff --git a/src/namespace.c b/src/namespace.c > new file mode 100644 > index 0000000..03db222 > --- /dev/null > +++ b/src/namespace.c > @@ -0,0 +1,112 @@ > +/* > + * Copyright (C) 2012 Nicira, Inc. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + * > + * Authors: > + * Ansis Atteka <aatteka@xxxxxxxxxx> > + */ > +#define _GNU_SOURCE > + > +#include "namespace.h" > + > +#include <fcntl.h> > +#include <sched.h> > +#include <sys/syscall.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <unistd.h> > + > +#include "conntrackd.h" > +#include "log.h" > + > +#ifndef CLONE_NEWNET > +#define CLONE_NEWNET 0x40000000 > +#endif > + > +#ifndef __NR_setns > +#ifdef __x86_64 > +#define __NR_setns 308 > +#endif > +#ifdef __i386 > +#define __NR_setns 346 > +#endif > +#endif I can see there's some setns wrapper since glibc 2.14 that will make this portable. What I would try is to check in configure.ac if setns() exists, if so use it. Otherwise, check for __NR_setns, if so define setns yourself in this new namespace.c file. If neither setns() nor __NR_setns exists, then define setns() to return success and print display error and exist if NetlinkNamespace is used. > + > +#ifdef __NR_setns > + > +static int root_fd = -1; This root_fd should go to STATE(ns).root_fd: struct ct_state { ... struct { int cur_fd; /* current namespace */ int root_fd; /* root namespace */ } ns; > + > +void init_namespaces(void) { > + root_fd = open("/proc/self/ns/net", O_RDONLY); > + if (root_fd == -1) { > + dlog(LOG_WARNING, "could not open current namespace"); > + } > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + struct nfct_handle *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = nfct_open(subsys_id, subscriptions); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } Oh, this code above is confusing, please make it more readable, like this: if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch between namespaces"); return NULL; } handle = nfct_open(subsys_id, subscriptions); if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); } And you encapsulate part of that code in some new function like: int ctd_change_namespace(int fd) { if (fd != -1) return 0; if (syscall(__NR_setns, fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); return -1; } return 0; } That you can reuse over and over again. > + return handle; > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + struct mnl_socket *handle = NULL; > + > + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "could not switch between namespaces"); > + } else { > + handle = mnl_socket_open(bus); > + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); > + } > + } > + return handle; Same thing for this function above. > +} > + > +#else > + > +void init_namespaces(void) { > +} > + > +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, > + int ns_fd) { > + if (ns_fd == -1) { > + return nfct_open(subsys_id, subscriptions); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} > + > +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { > + if (ns_fd == -1) { > + return mnl_socket_open(bus); > + } else { > + dlog(LOG_ERR, "network namespaces are not supported on this system"); > + return NULL; > + } > +} You can remove this conditional code above since the #else if you do something like: int ctd_change_namespace(int fd) { #ifdef SETNS_EXISTS if (fd != -1) return 0; if (syscall(__NR_setns, fd, CLONE_NEWNET)) { dlog(LOG_ERR, "could not switch back to main namespace"); return -1; } #endif return 0; } That's all by now. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html