On Sun, Jan 11, 2009 at 2:19 AM, Grzegorz Nosek <root@xxxxxxxxxxxxxx> wrote: > > So if I understand you right, your proposed solution would be something > akin to ipt_cgroup (matching packets originating from a cgroup, like > ipt_owner matches uid/gid) plus netfilter hooks for blocking/remapping > addresses passed to connect() and/or bind()? Or maybe a dedicated > netfilter table with per-cgroup chains? Yes, something like one of those options. But it would never need to be actually matching real packets in the data path - just connect/bind/accept requests in the control path. > > Using the iptables API with connect() sending a fake packet, how would > you represent "allow this connection, but bind() to 10.0.0.1 first"? > Rewrite the source address in an iptables target? Hmm, I hadn't considered that - I'd just been thinking of permit/deny decisions. But you're right, a rewrite rule might be a natural way to do this. Clearly this feature would only use a small subset of the available iptables API, so in that sense it might be overkill. But avoiding inventing a complex new API seems worth the potential overkill. I've attached the vague prototype that I was playing with a few months ago. It's missing some of the bits that it would need: - it uses the NF_INET_LOCAL_OUT table rather than a new NF_INET_CONTROL table, because trying to edit/recompile the iptables userspace binary to handle a new table proved to be too painful for this prototype. (i.e. it currently does use the fast path checks, but it really shouldn't ...) - it only currently handles connect() - no bind() or accept() - it doesn't have a cgroup-specific iptables filter yet - it just provides a system-wide control over connections. Adding a per-group filter would be pretty easy, I think As it stands, it's sufficient to express complex rules like "disallow connections to a remote sshd port, except on host H", etc. Paul
--- include/linux/netfilter.h | 1 include/linux/netfilter_ipv4.h | 7 + include/net/netns/ipv4.h | 3 net/ipv4/netfilter/Kconfig | 7 + net/ipv4/netfilter/Makefile | 3 net/ipv4/netfilter/ip_tables.c | 7 + net/ipv4/netfilter/iptable_control.c | 167 +++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 6 + net/netfilter/xt_tcpudp.c | 16 +++ 9 files changed, 214 insertions(+), 3 deletions(-) Index: netfilter-2.6.25-rc3/include/linux/netfilter.h =================================================================== --- netfilter-2.6.25-rc3.orig/include/linux/netfilter.h +++ netfilter-2.6.25-rc3/include/linux/netfilter.h @@ -47,6 +47,7 @@ enum nf_inet_hooks { NF_INET_FORWARD, NF_INET_LOCAL_OUT, NF_INET_POST_ROUTING, +// NF_INET_CONTROL, NF_INET_NUMHOOKS }; Index: netfilter-2.6.25-rc3/include/net/netns/ipv4.h =================================================================== --- netfilter-2.6.25-rc3.orig/include/net/netns/ipv4.h +++ netfilter-2.6.25-rc3/include/net/netns/ipv4.h @@ -33,5 +33,8 @@ struct netns_ipv4 { struct xt_table *iptable_raw; struct xt_table *arptable_filter; #endif +#ifdef CONFIG_IP_NF_CONTROL + struct xt_table *iptable_control; +#endif }; #endif Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/Kconfig =================================================================== --- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/Kconfig +++ netfilter-2.6.25-rc3/net/ipv4/netfilter/Kconfig @@ -281,6 +281,13 @@ config NF_NAT_SIP depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP +config IP_NF_CONTROL + tristate "Connection control" + depends on IP_NF_IPTABLES + default m + help + This option adds a control hook/table + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/Makefile =================================================================== --- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/Makefile +++ netfilter-2.6.25-rc3/net/ipv4/netfilter/Makefile @@ -34,11 +34,12 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o -# the three instances of ip_tables +# the five instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_CONTROL) += iptable_control.o # matches obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/iptable_control.c =================================================================== --- /dev/null +++ netfilter-2.6.25-rc3/net/ipv4/netfilter/iptable_control.c @@ -0,0 +1,167 @@ +/* + * 'control' table, used for controlling operations such as bind() or connect() + * + * Copyright (C) 2007 Paul Menage <menage@xxxxxxxxxx> + * Cloned from code originally by Jozsef Kadlecsik <kadlec@xxxxxxxxxxxxxxxxx> + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> +#include <net/tcp.h> + +#define CONTROL_VALID_HOOKS (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[1]; + struct ipt_error term; +} initial_table __net_initdata = { + .repl = { + .name = "control", + .valid_hooks = CONTROL_VALID_HOOKS, + .num_entries = 2, + .size = sizeof(struct ipt_standard) * 1 + sizeof(struct ipt_error), + .hook_entry = { + [NF_INET_LOCAL_OUT] = 0, + }, + .underflow = { + [NF_INET_LOCAL_OUT] = 0, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* CONTROL */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table packet_control = { + .name = "control", + .valid_hooks = CONTROL_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE, + .af = AF_INET, +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* We don't actually want to do anything in the real hook. (We + * should actually have a separate hook, but handling that + * from userspace is non-trivial. */ + return NF_ACCEPT; +} + +int ipt_control_check(int protocol, + struct inet_sock *sock, + struct sockaddr_in *remote) +{ + int err = 0; + struct sk_buff *skb = alloc_skb(MAX_TCP_HEADER, GFP_USER); + struct iphdr *iph; + struct tcphdr *th; + int verdict; + if (skb == NULL) { + return -ENOMEM; + } + + /* Allow the "owner" module to work */ + skb->sk = &sock->sk; + + /* Set up a fake TCP/UDP packet */ + iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); + skb_reset_network_header(skb); + memset(iph, 0, sizeof(*iph)); + iph->version = 4; + iph->protocol = protocol; + iph->saddr = sock->rcv_saddr; + iph->daddr = remote->sin_addr.s_addr; + iph->ihl = sizeof(*iph) / 4; + iph->tot_len = sizeof(*iph) + sizeof(*th); + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); + memset(th, 0, sizeof(*th)); + skb_set_transport_header(skb, sizeof(*iph)); + th->source = sock->num; + th->dest = remote->sin_port; + +#if 0 + printk(KERN_ERR "Calling ipt_do_table for %08x:%04x -> %08x:%04x. iph = %p, th = %p, data = %p, neth = %p, transh = %p\n", + sock->rcv_saddr, sock->num, remote->sin_addr.s_addr, remote->sin_port, iph, th, skb->data, skb->network_header, skb->transport_header); +#endif + verdict = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, NULL, init_net.ipv4.iptable_control); + + //printk(KERN_ERR "Verdict = %d\n", verdict); + if (verdict != NF_ACCEPT) { + err = -EPERM; + } + kfree_skb(skb); + return err; +} + +static struct nf_hook_ops control_ipt_ops[] __read_mostly = { + { + .hook = ipt_hook, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .owner = THIS_MODULE, + }, +}; + +static int __net_init iptable_control_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_control = + ipt_register_table(net, &packet_control, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_control)) { + int errno = PTR_ERR(net->ipv4.iptable_control); + printk(KERN_ERR "Failed to register control table: %d\n", + errno); + return errno; + } + return 0; +} + +static void __net_exit iptable_control_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_control); +} + +static struct pernet_operations iptable_control_net_ops = { + .init = iptable_control_net_init, + .exit = iptable_control_net_exit, +}; + +static int __init iptable_control_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_control_net_ops); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hooks(control_ipt_ops, ARRAY_SIZE(control_ipt_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + + cleanup_table: + unregister_pernet_subsys(&iptable_control_net_ops); + return ret; +} + +static void __exit iptable_control_fini(void) +{ + nf_unregister_hooks(control_ipt_ops, ARRAY_SIZE(control_ipt_ops)); + unregister_pernet_subsys(&iptable_control_net_ops); +} + +module_init(iptable_control_init); +module_exit(iptable_control_fini); +MODULE_LICENSE("GPL"); Index: netfilter-2.6.25-rc3/include/linux/netfilter_ipv4.h =================================================================== --- netfilter-2.6.25-rc3.orig/include/linux/netfilter_ipv4.h +++ netfilter-2.6.25-rc3/include/linux/netfilter_ipv4.h @@ -48,6 +48,7 @@ #define NF_IP_LOCAL_OUT 3 /* Packets about to hit the wire. */ #define NF_IP_POST_ROUTING 4 +//#define NF_IP_CONTROL 5 #define NF_IP_NUMHOOKS 5 #endif /* ! __KERNEL__ */ @@ -79,6 +80,12 @@ extern int ip_route_me_harder(struct sk_ extern int ip_xfrm_me_harder(struct sk_buff *skb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); + +struct inet_sock; +int ipt_control_check(int protocol, + struct inet_sock *sock, + struct sockaddr_in *remote); + #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ Index: netfilter-2.6.25-rc3/net/ipv4/netfilter/ip_tables.c =================================================================== --- netfilter-2.6.25-rc3.orig/net/ipv4/netfilter/ip_tables.c +++ netfilter-2.6.25-rc3/net/ipv4/netfilter/ip_tables.c @@ -32,9 +32,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@xxxxxxxxxxxxx>"); MODULE_DESCRIPTION("IPv4 packet filter"); -/*#define DEBUG_IP_FIREWALL*/ +#define DEBUG_IP_FIREWALL /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ -/*#define DEBUG_IP_FIREWALL_USER*/ +#define DEBUG_IP_FIREWALL_USER #ifdef DEBUG_IP_FIREWALL #define dprintf(format, args...) printk(format , ## args) @@ -231,6 +231,9 @@ static const char *const hooknames[] = { [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", +#ifdef CONFIG_IP_NF_CONTROL + [NF_INET_CONTROL] = "CONTROL", +#endif }; enum nf_ip_trace_comments { Index: netfilter-2.6.25-rc3/net/ipv4/tcp_ipv4.c =================================================================== --- netfilter-2.6.25-rc3.orig/net/ipv4/tcp_ipv4.c +++ netfilter-2.6.25-rc3/net/ipv4/tcp_ipv4.c @@ -82,6 +82,8 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> +#include <linux/netfilter_ipv4.h> + int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; @@ -166,6 +168,10 @@ int tcp_v4_connect(struct sock *sk, stru if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; + if ((err = ipt_control_check(IPPROTO_TCP, inet, usin))) { + return err; + } + nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt && inet->opt->srr) { if (!daddr) Index: netfilter-2.6.25-rc3/net/netfilter/xt_tcpudp.c =================================================================== --- netfilter-2.6.25-rc3.orig/net/netfilter/xt_tcpudp.c +++ netfilter-2.6.25-rc3/net/netfilter/xt_tcpudp.c @@ -19,6 +19,7 @@ MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); +#define DEBUG_IP_FIREWALL_USER #ifdef DEBUG_IP_FIREWALL_USER #define duprintf(format, args...) printk(format , ## args) #else @@ -75,6 +76,8 @@ tcp_mt(const struct sk_buff *skb, const struct tcphdr _tcph, *th; const struct xt_tcp *tcpinfo = matchinfo; + printk(KERN_ERR "In tcp_mt\n"); + if (offset) { /* To quote Alan: @@ -93,6 +96,8 @@ tcp_mt(const struct sk_buff *skb, const #define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + + printk(KERN_ERR "th=%p\n", th); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ @@ -101,18 +106,29 @@ tcp_mt(const struct sk_buff *skb, const return false; } + duprintf("Checking source ports\n"); + if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) return false; + + duprintf("Checking dest ports %d - %d vs %d\n", tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest)); + if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; + + duprintf("Checking flags\n"); + if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp, XT_TCP_INV_FLAGS)) return false; + + duprintf("Checking options\n"); + if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { *hotdrop = true;
_______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers