On Thu, Sep 6, 2012 at 10:02 AM, Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> wrote: > Hi Ansis, > > On Fri, Aug 31, 2012 at 06:11:55PM -0700, Ansis Atteka wrote: >> This patch allows conntrackd to open CT Netlink sockets into a given >> network namespace. Channel sockets (e.g. UDP) would still be opened into >> the same namespace where conntrackd was started. >> >> The only binary this patch affects is conntrackd. All other binaries (e.g. >> conntrack, nfct) would still operate in the same namespace where they were >> started. >> >> To make use of this patch: >> 1. create a network namespace: "ip netns add the_ns" >> 2. add "NetlinkNamespace /var/run/netns/the_ns" line to the conntrackd.conf >> file inside General {...} section. >> >> Signed-off-by: Ansis Atteka <aatteka@xxxxxxxxxx> >> --- >> include/Makefile.am | 2 +- >> include/conntrackd.h | 2 + >> include/namespace.h | 12 ++++++ >> src/Makefile.am | 3 +- >> src/cthelper.c | 3 +- >> src/ctnl.c | 20 +++++++-- >> src/expect.c | 4 +- >> src/external_inject.c | 3 +- >> src/internal_bypass.c | 5 ++- >> src/namespace.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ >> src/netlink.c | 6 ++- >> src/read_config_lex.l | 1 + >> src/read_config_yy.y | 8 +++- >> src/run.c | 3 ++ >> src/sync-mode.c | 4 +- >> src/sync-notrack.c | 3 +- >> 16 files changed, 175 insertions(+), 16 deletions(-) >> create mode 100644 include/namespace.h >> create mode 100644 src/namespace.c >> >> diff --git a/include/Makefile.am b/include/Makefile.am >> index 6bd0f7f..e06fd4d 100644 >> --- a/include/Makefile.am >> +++ b/include/Makefile.am >> @@ -6,5 +6,5 @@ noinst_HEADERS = alarm.h jhash.h cache.h linux_list.h linux_rbtree.h \ >> network.h filter.h queue.h vector.h cidr.h \ >> traffic_stats.h netlink.h fds.h event.h bitops.h channel.h \ >> process.h origin.h internal.h external.h date.h nfct.h \ >> - helper.h myct.h stack.h >> + helper.h myct.h stack.h namespace.h >> >> diff --git a/include/conntrackd.h b/include/conntrackd.h >> index 19e613c..c349d72 100644 >> --- a/include/conntrackd.h >> +++ b/include/conntrackd.h >> @@ -94,6 +94,7 @@ struct ct_conf { >> int channel_type_global; >> struct channel_conf channel[MULTICHANNEL_MAX]; >> struct local_conf local; /* unix socket facilities */ >> + char netlink_namespace[FILENAME_MAXLEN]; >> int nice; >> int limit; >> int refresh; >> @@ -143,6 +144,7 @@ struct ct_conf { >> #define STATE(x) st.x >> >> struct ct_general_state { >> + int ns_fd; /* namespace fd for NL sockets */ > > We use the same coding style of the Linux kernel. Please, break lines > at 80 chars. Ok. Forgot that tabs are treated as 8 characters instead of 4. > >> sigset_t block; >> FILE *log; >> FILE *stats_log; >> diff --git a/include/namespace.h b/include/namespace.h >> new file mode 100644 >> index 0000000..668a270 >> --- /dev/null >> +++ b/include/namespace.h >> @@ -0,0 +1,12 @@ >> +#ifndef _NAMESPACE_H_ >> +#define _NAMESPACE_H_ >> + >> +#include <libmnl/libmnl.h> >> +#include <libnetfilter_conntrack/libnetfilter_conntrack.h> >> + >> +void init_namespaces(void); >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t, unsigned, int); >> +struct mnl_socket *mnl_socket_open_ns(int, int); >> + >> +#endif >> diff --git a/src/Makefile.am b/src/Makefile.am >> index d8074d2..60314ab 100644 >> --- a/src/Makefile.am >> +++ b/src/Makefile.am >> @@ -39,7 +39,8 @@ conntrackd_SOURCES = alarm.c main.c run.c hash.c queue.c rbtree.c \ >> external_cache.c external_inject.c \ >> internal_cache.c internal_bypass.c \ >> read_config_yy.y read_config_lex.l \ >> - stack.c helpers.c utils.c expect.c >> + stack.c helpers.c utils.c expect.c \ >> + namespace.c >> >> # yacc and lex generate dirty code >> read_config_yy.o read_config_lex.o: AM_CFLAGS += -Wno-missing-prototypes -Wno-missing-declarations -Wno-implicit-function-declaration -Wno-nested-externs -Wno-undef -Wno-redundant-decls >> diff --git a/src/cthelper.c b/src/cthelper.c >> index c119869..b165900 100644 >> --- a/src/cthelper.c >> +++ b/src/cthelper.c >> @@ -22,6 +22,7 @@ >> #include "log.h" >> #include "fds.h" >> #include "helper.h" >> +#include "namespace.h" >> >> #include <unistd.h> >> #include <fcntl.h> >> @@ -493,7 +494,7 @@ int cthelper_init(void) >> return -1; >> } >> >> - STATE_CTH(nl) = mnl_socket_open(NETLINK_NETFILTER); >> + STATE_CTH(nl) = mnl_socket_open_ns(NETLINK_NETFILTER, STATE(ns_fd)); >> if (STATE_CTH(nl) == NULL) { >> dlog(LOG_ERR, "cannot open nfq socket"); >> return -1; >> diff --git a/src/ctnl.c b/src/ctnl.c >> index bb54727..65784c3 100644 >> --- a/src/ctnl.c >> +++ b/src/ctnl.c >> @@ -29,6 +29,7 @@ >> #include "origin.h" >> #include "date.h" >> #include "internal.h" >> +#include "namespace.h" >> >> #include <errno.h> >> #include <signal.h> >> @@ -399,6 +400,17 @@ static void poll_cb(void *data) >> >> int ctnl_init(void) >> { >> + if (CONFIG(netlink_namespace)[0]) { >> + STATE(ns_fd) = open(CONFIG(netlink_namespace), O_RDONLY); >> + if (STATE(ns_fd) == -1) { >> + dlog(LOG_ERR, "could not open network namespace %s: %s", >> + CONFIG(netlink_namespace), strerror(errno)); >> + return -1; >> + } >> + } else { >> + STATE(ns_fd) = -1; >> + } >> + >> if (CONFIG(flags) & CTD_STATS_MODE) >> STATE(mode) = &stats_mode; >> else if (CONFIG(flags) & CTD_SYNC_MODE) >> @@ -417,7 +429,7 @@ int ctnl_init(void) >> } >> >> /* resynchronize (like 'dump' socket) but it also purges old entries */ >> - STATE(resync) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(resync) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(resync)== NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -438,7 +450,7 @@ int ctnl_init(void) >> fcntl(nfct_fd(STATE(resync)), F_SETFL, O_NONBLOCK); >> >> if (STATE(mode)->internal->flags & INTERNAL_F_POPULATE) { >> - STATE(dump) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(dump) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(dump) == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -467,7 +479,7 @@ int ctnl_init(void) >> } >> } >> >> - STATE(get) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(get) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(get) == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> @@ -481,7 +493,7 @@ int ctnl_init(void) >> exp_get_handler, NULL); >> } >> >> - STATE(flush) = nfct_open(CONFIG(netlink).subsys_id, 0); >> + STATE(flush) = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (STATE(flush) == NULL) { >> dlog(LOG_ERR, "cannot open flusher handler"); >> return -1; >> diff --git a/src/expect.c b/src/expect.c >> index 6069770..bedec2c 100644 >> --- a/src/expect.c >> +++ b/src/expect.c >> @@ -8,7 +8,9 @@ >> * This code has been sponsored by Vyatta Inc. <http://www.vyatta.com> >> */ >> >> +#include "conntrackd.h" >> #include "helper.h" >> +#include "namespace.h" >> >> #include <stdio.h> >> #include <string.h> >> @@ -165,7 +167,7 @@ static int cthelper_expect_cmd(struct nf_expect *exp, int cmd) >> int ret; >> struct nfct_handle *h; >> >> - h = nfct_open(EXPECT, 0); >> + h = nfct_open_ns(EXPECT, 0, STATE(ns_fd)); >> if (!h) >> return -1; >> >> diff --git a/src/external_inject.c b/src/external_inject.c >> index 0ad3478..5a6680b 100644 >> --- a/src/external_inject.c >> +++ b/src/external_inject.c >> @@ -22,6 +22,7 @@ >> #include "cache.h" >> #include "origin.h" >> #include "external.h" >> +#include "namespace.h" >> #include "netlink.h" >> >> #include <libnetfilter_conntrack/libnetfilter_conntrack.h> >> @@ -42,7 +43,7 @@ struct { >> static int external_inject_init(void) >> { >> /* handler to directly inject conntracks into kernel-space */ >> - inject = nfct_open(CONFIG(netlink).subsys_id, 0); >> + inject = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (inject == NULL) { >> dlog(LOG_ERR, "can't open netlink handler: %s", >> strerror(errno)); >> diff --git a/src/internal_bypass.c b/src/internal_bypass.c >> index 1194339..520c55d 100644 >> --- a/src/internal_bypass.c >> +++ b/src/internal_bypass.c >> @@ -16,6 +16,7 @@ >> #include "netlink.h" >> #include "network.h" >> #include "origin.h" >> +#include "namespace.h" >> >> static int internal_bypass_init(void) >> { >> @@ -52,7 +53,7 @@ static void internal_bypass_ct_dump(int fd, int type) >> u_int32_t family = AF_UNSPEC; >> int ret; >> >> - h = nfct_open(CONFIG(netlink).subsys_id, 0); >> + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (h == NULL) { >> dlog(LOG_ERR, "can't allocate memory for the internal cache"); >> return; >> @@ -183,7 +184,7 @@ static void internal_bypass_exp_dump(int fd, int type) >> u_int32_t family = AF_UNSPEC; >> int ret; >> >> - h = nfct_open(CONFIG(netlink).subsys_id, 0); >> + h = nfct_open_ns(CONFIG(netlink).subsys_id, 0, STATE(ns_fd)); >> if (h == NULL) { >> dlog(LOG_ERR, "can't allocate memory for the internal cache"); >> return; >> diff --git a/src/namespace.c b/src/namespace.c >> new file mode 100644 >> index 0000000..03db222 >> --- /dev/null >> +++ b/src/namespace.c >> @@ -0,0 +1,112 @@ >> +/* >> + * Copyright (C) 2012 Nicira, Inc. >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. >> + * >> + * Authors: >> + * Ansis Atteka <aatteka@xxxxxxxxxx> >> + */ >> +#define _GNU_SOURCE >> + >> +#include "namespace.h" >> + >> +#include <fcntl.h> >> +#include <sched.h> >> +#include <sys/syscall.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <unistd.h> >> + >> +#include "conntrackd.h" >> +#include "log.h" >> + >> +#ifndef CLONE_NEWNET >> +#define CLONE_NEWNET 0x40000000 >> +#endif >> + >> +#ifndef __NR_setns >> +#ifdef __x86_64 >> +#define __NR_setns 308 >> +#endif >> +#ifdef __i386 >> +#define __NR_setns 346 >> +#endif >> +#endif > > I can see there's some setns wrapper since glibc 2.14 that will make > this portable. ok > > What I would try is to check in configure.ac if setns() exists, if so > use it. Otherwise, check for __NR_setns, if so define setns yourself > in this new namespace.c file. If neither setns() nor __NR_setns > exists, then define setns() to return success and print display error > and exist if NetlinkNamespace is used. ok > >> + >> +#ifdef __NR_setns >> + >> +static int root_fd = -1; > > This root_fd should go to STATE(ns).root_fd: Right, it makes more sense to put it inside ct_state for now. Once conntrackd will become multi-namespace aware, we will have to move it somewhere else, because all the namespaces(i.e. ct_states) will use the same root_fd. > > struct ct_state { > ... > struct { > int cur_fd; /* current namespace */ > int root_fd; /* root namespace */ > } ns; > >> + >> +void init_namespaces(void) { >> + root_fd = open("/proc/self/ns/net", O_RDONLY); >> + if (root_fd == -1) { >> + dlog(LOG_WARNING, "could not open current namespace"); >> + } >> +} >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, >> + int ns_fd) { >> + struct nfct_handle *handle = NULL; >> + >> + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "could not switch between namespaces"); >> + } else { >> + handle = nfct_open(subsys_id, subscriptions); >> + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); >> + } >> + } > > Oh, this code above is confusing, please make it more readable, like > this: > > if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch between namespaces"); > return NULL; > } > > handle = nfct_open(subsys_id, subscriptions); > if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > } > > And you encapsulate part of that code in some new function like: > > int ctd_change_namespace(int fd) > { > if (fd != -1) > return 0; > > if (syscall(__NR_setns, fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > return -1; > } > > return 0; > } > > That you can reuse over and over again. ok > >> + return handle; >> +} >> + >> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { >> + struct mnl_socket *handle = NULL; >> + >> + if (ns_fd != -1 && syscall(__NR_setns, ns_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "could not switch between namespaces"); >> + } else { >> + handle = mnl_socket_open(bus); >> + if (ns_fd != -1 && syscall(__NR_setns, root_fd, CLONE_NEWNET)) { >> + dlog(LOG_ERR, "fatal: could not switch back to main namespace"); >> + } >> + } >> + return handle; > > Same thing for this function above. ok > >> +} >> + >> +#else >> + >> +void init_namespaces(void) { >> +} >> + >> +struct nfct_handle *nfct_open_ns(u_int8_t subsys_id, unsigned subscriptions, >> + int ns_fd) { >> + if (ns_fd == -1) { >> + return nfct_open(subsys_id, subscriptions); >> + } else { >> + dlog(LOG_ERR, "network namespaces are not supported on this system"); >> + return NULL; >> + } >> +} >> + >> +struct mnl_socket *mnl_socket_open_ns(int bus, int ns_fd) { >> + if (ns_fd == -1) { >> + return mnl_socket_open(bus); >> + } else { >> + dlog(LOG_ERR, "network namespaces are not supported on this system"); >> + return NULL; >> + } >> +} > > You can remove this conditional code above since the #else if you do > something like: > > int ctd_change_namespace(int fd) > { > #ifdef SETNS_EXISTS > if (fd != -1) > return 0; > > if (syscall(__NR_setns, fd, CLONE_NEWNET)) { > dlog(LOG_ERR, "could not switch back to main namespace"); > return -1; > } > #endif > return 0; > } > > That's all by now. Thanks for review! I will send updated patch soon. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html