hey all, is this patch gonna be on kernel tree? It works fines. Yesterday I was over attack, after applied this patch my problem was solved. It hasn't dropped real connections. Sometimes i changed to freebsd due synproxy state on pf. Thanks, On Fri, Jul 2, 2010 at 1:19 AM, Changli Gao <xiaosuo@xxxxxxxxx> wrote: > > v3: > fix the bug it can't work with bridge. > > netfilter: xtables target SYNPROXY. > > This patch implements an xtables target SYNPROXY. As the connection to the > TCP server won't be established until the ACK from the client is received, it > can protect the TCP server from the SYN-flood attacks. > > It works in the raw table of the PREROUTING chain, before conntracking system. > Syncookies is used, so no new state is introduced into the conntracking system. > In fact, until the first connection is established, conntracking system doesn't > see any packets. So when there is a SYN-flood attack, conntracking system won't > be busy on finding and deleting the un-assured ct. > > As the SYN-packet of the second connection request is sent locally, the DNAT > rules which are in the PREROUTING chain should be moved to the OUTPUT chain. > > Signed-off-by: Changli Gao <xiaosuo@xxxxxxxxx> > ---- > include/net/netfilter/nf_conntrack.h | 10 > include/net/netfilter/nf_conntrack_core.h | 21 > include/net/netfilter/nf_conntrack_extend.h | 2 > include/net/tcp.h | 7 > net/ipv4/syncookies.c | 22 > net/ipv4/tcp_ipv4.c | 9 > net/netfilter/Kconfig | 17 > net/netfilter/Makefile | 1 > net/netfilter/nf_conntrack_core.c | 45 + > net/netfilter/xt_SYNPROXY.c | 679 ++++++++++++++++++++++++++++ > 10 files changed, 794 insertions(+), 19 deletions(-) > diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h > index e624dae..5e6d8e4 100644 > --- a/include/net/netfilter/nf_conntrack.h > +++ b/include/net/netfilter/nf_conntrack.h > @@ -311,5 +311,15 @@ do { \ > #define MODULE_ALIAS_NFCT_HELPER(helper) \ > MODULE_ALIAS("nfct-helper-" helper) > > +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \ > + defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE) > +extern unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb, > + struct nf_conn *ct, > + enum ip_conntrack_info ctinfo); > + > +extern unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb, > + struct nf_conn *ct, > + enum ip_conntrack_info ctinfo); > +#endif > #endif /* __KERNEL__ */ > #endif /* _NF_CONNTRACK_H */ > diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h > index aced085..637b404 100644 > --- a/include/net/netfilter/nf_conntrack_core.h > +++ b/include/net/netfilter/nf_conntrack_core.h > @@ -54,6 +54,23 @@ nf_conntrack_find_get(struct net *net, u16 zone, > > extern int __nf_conntrack_confirm(struct sk_buff *skb); > > +static inline unsigned int syn_proxy_post_call(struct sk_buff *skb, > + struct nf_conn *ct, > + enum ip_conntrack_info ctinfo) > +{ > + unsigned int ret = NF_ACCEPT; > +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \ > + defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE) > + unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *, > + enum ip_conntrack_info); > + syn_proxy = rcu_dereference(syn_proxy_post_hook); > + if (syn_proxy) > + ret = syn_proxy(skb, ct, ctinfo); > +#endif > + > + return ret; > +} > + > /* Confirm a connection: returns NF_DROP if packet must be dropped. */ > static inline int nf_conntrack_confirm(struct sk_buff *skb) > { > @@ -63,8 +80,10 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) > if (ct && !nf_ct_is_untracked(ct)) { > if (!nf_ct_is_confirmed(ct)) > ret = __nf_conntrack_confirm(skb); > - if (likely(ret == NF_ACCEPT)) > + if (likely(ret == NF_ACCEPT)) { > nf_ct_deliver_cached_events(ct); > + ret = syn_proxy_post_call(skb, ct, skb->nfctinfo); > + } > } > return ret; > } > diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h > index 32d15bd..b2ae7e9 100644 > --- a/include/net/netfilter/nf_conntrack_extend.h > +++ b/include/net/netfilter/nf_conntrack_extend.h > @@ -11,6 +11,7 @@ enum nf_ct_ext_id { > NF_CT_EXT_ACCT, > NF_CT_EXT_ECACHE, > NF_CT_EXT_ZONE, > + NF_CT_EXT_SYNPROXY, > NF_CT_EXT_NUM, > }; > > @@ -19,6 +20,7 @@ enum nf_ct_ext_id { > #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter > #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache > #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone > +#define NF_CT_EXT_SYNPROXY_TYPE struct syn_proxy_state > > /* Extensions: optional stuff which isn't permanently in struct. */ > struct nf_ct_ext { > diff --git a/include/net/tcp.h b/include/net/tcp.h > index c2f96c2..06f28d3 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -460,8 +460,11 @@ extern int tcp_disconnect(struct sock *sk, int flags); > extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; > extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, > struct ip_options *opt); > -extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, > - __u16 *mss); > +extern __u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr, > + __be16 sport, __be16 dport, __u32 seq, > + __u16 *mssp); > +extern int cookie_v4_check_sequence(const struct iphdr *iph, > + const struct tcphdr *th, __u32 cookie); > > extern __u32 cookie_init_timestamp(struct request_sock *req); > extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *); > diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c > index 650cace..3adcba3 100644 > --- a/net/ipv4/syncookies.c > +++ b/net/ipv4/syncookies.c > @@ -159,26 +159,21 @@ static __u16 const msstab[] = { > * Generate a syncookie. mssp points to the mss, which is returned > * rounded down to the value encoded in the cookie. > */ > -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) > +__u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr, __be16 sport, > + __be16 dport, __u32 seq, __u16 *mssp) > { > - const struct iphdr *iph = ip_hdr(skb); > - const struct tcphdr *th = tcp_hdr(skb); > int mssind; > const __u16 mss = *mssp; > > - tcp_synq_overflow(sk); > - > for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) > if (mss >= msstab[mssind]) > break; > *mssp = msstab[mssind]; > > - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); > - > - return secure_tcp_syn_cookie(iph->saddr, iph->daddr, > - th->source, th->dest, ntohl(th->seq), > + return secure_tcp_syn_cookie(saddr, daddr, sport, dport, seq, > jiffies / (HZ * 60), mssind); > } > +EXPORT_SYMBOL(__cookie_v4_init_sequence); > > /* > * This (misnamed) value is the age of syncookie which is permitted. > @@ -191,10 +186,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) > * Check if a ack sequence number is a valid syncookie. > * Return the decoded mss if it is, or 0 if not. > */ > -static inline int cookie_check(struct sk_buff *skb, __u32 cookie) > +int cookie_v4_check_sequence(const struct iphdr *iph, const struct tcphdr *th, > + __u32 cookie) > { > - const struct iphdr *iph = ip_hdr(skb); > - const struct tcphdr *th = tcp_hdr(skb); > __u32 seq = ntohl(th->seq) - 1; > __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, > th->source, th->dest, seq, > @@ -203,6 +197,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) > > return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; > } > +EXPORT_SYMBOL(cookie_v4_check_sequence); > > static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, > struct request_sock *req, > @@ -282,7 +277,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, > goto out; > > if (tcp_synq_no_recent_overflow(sk) || > - (mss = cookie_check(skb, cookie)) == 0) { > + (mss = cookie_v4_check_sequence(ip_hdr(skb), tcp_hdr(skb), > + cookie)) == 0) { > NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); > goto out; > } > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 8fa32f5..3b094c7 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -1332,7 +1332,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) > TCP_ECN_create_request(req, tcp_hdr(skb)); > > if (want_cookie) { > - isn = cookie_v4_init_sequence(sk, skb, &req->mss); > + struct tcphdr *th; > + > + tcp_synq_overflow(sk); > + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); > + th = tcp_hdr(skb); > + isn = __cookie_v4_init_sequence(saddr, daddr, th->source, > + th->dest, ntohl(th->seq), > + &req->mss); > req->cookie_ts = tmp_opt.tstamp_ok; > } else if (!isn) { > struct inet_peer *peer = NULL; > diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig > index 413ed24..fd8ad8c 100644 > --- a/net/netfilter/Kconfig > +++ b/net/netfilter/Kconfig > @@ -560,6 +560,23 @@ config NETFILTER_XT_TARGET_SECMARK > > To compile it as a module, choose M here. If unsure, say N. > > +config NETFILTER_XT_TARGET_SYNPROXY > + tristate '"SYNPROXY" target support (EXPERIMENTAL)' > + depends on EXPERIMENTAL > + depends on SYN_COOKIES > + depends on IP_NF_RAW > + depends on NF_CONNTRACK > + depends on NETFILTER_ADVANCED > + help > + The SYNPROXY target allows a raw rule to specify that some TCP > + connections are relayed to protect the TCP servers from the SYN-flood > + DoS attacks. Syn cookies is used to save the initial state, so no > + conntrack is needed until the client side connection is established. > + It frees the connection tracking system from creating/deleting > + conntracks when SYN-flood DoS attack acts. > + > + To compile it as a module, choose M here. If unsure, say N. > + > config NETFILTER_XT_TARGET_TCPMSS > tristate '"TCPMSS" target support' > depends on (IPV6 || IPV6=n) > diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile > index e28420a..4e32834 100644 > --- a/net/netfilter/Makefile > +++ b/net/netfilter/Makefile > @@ -62,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o > obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o > obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o > obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o > +obj-$(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) += xt_SYNPROXY.o > > # matches > obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o > diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c > index 16b41b4..dd85d6f 100644 > --- a/net/netfilter/nf_conntrack_core.c > +++ b/net/netfilter/nf_conntrack_core.c > @@ -800,6 +800,26 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, > return ct; > } > > +static inline unsigned int syn_proxy_pre_call(int protonum, struct sk_buff *skb, > + struct nf_conn *ct, > + enum ip_conntrack_info ctinfo) > +{ > + unsigned int ret = NF_ACCEPT; > +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \ > + defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE) > + unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *, > + enum ip_conntrack_info); > + > + if (protonum == IPPROTO_TCP) { > + syn_proxy = rcu_dereference(syn_proxy_pre_hook); > + if (syn_proxy) > + ret = syn_proxy(skb, ct, ctinfo); > + } > +#endif > + > + return ret; > +} > + > unsigned int > nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, > struct sk_buff *skb) > @@ -855,8 +875,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, > l3proto, l4proto, &set_reply, &ctinfo); > if (!ct) { > /* Not valid part of a connection */ > - NF_CT_STAT_INC_ATOMIC(net, invalid); > - ret = NF_ACCEPT; > + ret = syn_proxy_pre_call(protonum, skb, NULL, ctinfo); > + if (ret == NF_ACCEPT) > + NF_CT_STAT_INC_ATOMIC(net, invalid); > goto out; > } > > @@ -869,6 +890,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, > > NF_CT_ASSERT(skb->nfct); > > + ret = syn_proxy_pre_call(protonum, skb, ct, ctinfo); > + if (ret != NF_ACCEPT) > + goto out; > ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); > if (ret <= 0) { > /* Invalid: inverse of the return code tells > @@ -1476,6 +1500,17 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, > u32 seq); > EXPORT_SYMBOL_GPL(nf_ct_nat_offset); > > +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \ > + defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE) > +unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb, struct nf_conn *ct, > + enum ip_conntrack_info ctinfo); > +EXPORT_SYMBOL(syn_proxy_pre_hook); > + > +unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb, struct nf_conn *ct, > + enum ip_conntrack_info ctinfo); > +EXPORT_SYMBOL(syn_proxy_post_hook); > +#endif > + > int nf_conntrack_init(struct net *net) > { > int ret; > @@ -1496,6 +1531,12 @@ int nf_conntrack_init(struct net *net) > > /* Howto get NAT offsets */ > rcu_assign_pointer(nf_ct_nat_offset, NULL); > + > +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \ > + defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE) > + rcu_assign_pointer(syn_proxy_pre_hook, NULL); > + rcu_assign_pointer(syn_proxy_post_hook, NULL); > +#endif > } > return 0; > > diff --git a/net/netfilter/xt_SYNPROXY.c b/net/netfilter/xt_SYNPROXY.c > new file mode 100644 > index 0000000..1a55f33 > --- /dev/null > +++ b/net/netfilter/xt_SYNPROXY.c > @@ -0,0 +1,679 @@ > +/* (C) 2010- Changli Gao <xiaosuo@xxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * It bases on ipt_REJECT.c > + */ > +#define pr_fmt(fmt) "SYNPROXY: " fmt > +#include <linux/module.h> > +#include <linux/skbuff.h> > +#include <linux/slab.h> > +#include <linux/ip.h> > +#include <linux/udp.h> > +#include <linux/icmp.h> > +#include <linux/unaligned/access_ok.h> > +#include <net/icmp.h> > +#include <net/ip.h> > +#include <net/tcp.h> > +#include <net/route.h> > +#include <net/dst.h> > +#include <net/netfilter/nf_conntrack.h> > +#include <net/netfilter/nf_conntrack_extend.h> > +#include <linux/netfilter/x_tables.h> > +#include <linux/netfilter_ipv4/ip_tables.h> > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Changli Gao <xiaosuo@xxxxxxxxx>"); > +MODULE_DESCRIPTION("Xtables: \"SYNPROXY\" target for IPv4"); > +MODULE_ALIAS("ipt_SYNPROXY"); > + > +enum { > + TCP_SEND_FLAG_NOTRACE = 0x1, > + TCP_SEND_FLAG_SYNCOOKIE = 0x2, > + TCP_SEND_FLAG_ACK2SYN = 0x4, > +}; > + > +struct syn_proxy_state { > + u16 seq_inited; > + __be16 window; > + u32 seq_diff; > +}; > + > +static int get_mtu(const struct dst_entry *dst) > +{ > + int mtu; > + > + mtu = dst_mtu(dst); > + if (mtu) > + return mtu; > + > + return dst->dev ? dst->dev->mtu : 0; > +} > + > +static int get_advmss(const struct dst_entry *dst) > +{ > + int advmss; > + > + advmss = dst_metric(dst, RTAX_ADVMSS); > + if (advmss) > + return advmss; > + advmss = get_mtu(dst); > + if (advmss) > + return advmss - (sizeof(struct iphdr) + sizeof(struct tcphdr)); > + > + return TCP_MSS_DEFAULT; > +} > + > +static int syn_proxy_route(struct sk_buff *skb, struct net *net, u16 *pmss) > +{ > + const struct iphdr *iph = ip_hdr(skb); > + struct rtable *rt; > + struct flowi fl = {}; > + unsigned int type; > + int flags = 0; > + int err; > + u16 mss; > + > + type = inet_addr_type(net, iph->saddr); > + if (type != RTN_LOCAL) { > + type = inet_addr_type(net, iph->daddr); > + if (type == RTN_LOCAL) > + flags |= FLOWI_FLAG_ANYSRC; > + } > + > + if (type == RTN_LOCAL) { > + fl.nl_u.ip4_u.daddr = iph->daddr; > + fl.nl_u.ip4_u.saddr = iph->saddr; > + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); > + fl.flags = flags; > + err = ip_route_output_key(net, &rt, &fl); > + if (err) > + goto out; > + > + skb_dst_set(skb, &rt->dst); > + } else { > + /* non-local src, find valid iif to satisfy > + * rp-filter when calling ip_route_input. */ > + fl.nl_u.ip4_u.daddr = iph->saddr; > + err = ip_route_output_key(net, &rt, &fl); > + if (err) > + goto out; > + > + err = ip_route_input(skb, iph->daddr, iph->saddr, > + RT_TOS(iph->tos), rt->dst.dev); > + if (err) { > + dst_release(&rt->dst); > + goto out; > + } > + if (pmss) { > + mss = get_advmss(&rt->dst); > + if (*pmss > mss) > + *pmss = mss; > + } > + dst_release(&rt->dst); > + } > + > + err = skb_dst(skb)->error; > + if (!err && pmss) { > + mss = get_advmss(skb_dst(skb)); > + if (*pmss > mss) > + *pmss = mss; > + } > + > +out: > + return err; > +} > + > +static int tcp_send(__be32 src, __be32 dst, __be16 sport, __be16 dport, > + u32 seq, u32 ack_seq, __be16 window, u16 mss, u8 tcp_flags, > + u8 tos, struct net_device *dev, int flags, > + struct sk_buff *oskb) > +{ > + struct sk_buff *skb; > + struct iphdr *iph; > + struct tcphdr *th; > + int err, len; > + > + len = sizeof(*th); > + if (mss) > + len += TCPOLEN_MSS; > + > + skb = NULL; > + /* caller must give me a large enough oskb */ > + if (oskb) { > + unsigned char *odata = oskb->data; > + > + if (skb_recycle_check(oskb, 0)) { > + oskb->data = odata; > + skb_reset_tail_pointer(oskb); > + skb = oskb; > + pr_debug("recycle skb\n"); > + } > + } > + if (!skb) { > + skb = alloc_skb(LL_MAX_HEADER + sizeof(*iph) + len, GFP_ATOMIC); > + if (!skb) { > + err = -ENOMEM; > + goto out; > + } > + skb_reserve(skb, LL_MAX_HEADER); > + } > + > + skb_reset_network_header(skb); > + if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb) { > + iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); > + iph->version = 4; > + iph->ihl = sizeof(*iph) / 4; > + iph->tos = tos; > + /* tot_len is set in ip_local_out() */ > + iph->id = 0; > + iph->frag_off = htons(IP_DF); > + iph->protocol = IPPROTO_TCP; > + iph->saddr = src; > + iph->daddr = dst; > + th = (struct tcphdr *)skb_put(skb, len); > + th->source = sport; > + th->dest = dport; > + } else { > + iph = (struct iphdr *)skb->data; > + iph->id = 0; > + iph->frag_off = htons(IP_DF); > + skb_put(skb, iph->ihl * 4 + len); > + th = (struct tcphdr *)(skb->data + iph->ihl * 4); > + } > + > + th->seq = htonl(seq); > + th->ack_seq = htonl(ack_seq); > + tcp_flag_byte(th) = tcp_flags; > + th->doff = len / 4; > + th->window = window; > + th->urg_ptr = 0; > + > + skb->protocol = htons(ETH_P_IP); > + if ((flags & TCP_SEND_FLAG_SYNCOOKIE) && mss) > + err = syn_proxy_route(skb, dev_net(dev), &mss); > + else > + err = syn_proxy_route(skb, dev_net(dev), NULL); > + if (err) > + goto err_out; > + > + if ((flags & TCP_SEND_FLAG_SYNCOOKIE)) { > + if (mss) { > + th->seq = htonl(__cookie_v4_init_sequence(dst, src, > + dport, sport, > + ack_seq - 1, > + &mss)); > + } else { > + mss = TCP_MSS_DEFAULT; > + th->seq = htonl(__cookie_v4_init_sequence(dst, src, > + dport, sport, > + ack_seq - 1, > + &mss)); > + mss = 0; > + } > + } > + > + if (mss) > + * (__force __be32 *)(th + 1) = htonl((TCPOPT_MSS << 24) | > + (TCPOLEN_MSS << 16) | > + mss); > + skb->ip_summed = CHECKSUM_PARTIAL; > + th->check = ~tcp_v4_check(len, src, dst, 0); > + skb->csum_start = (unsigned char *)th - skb->head; > + skb->csum_offset = offsetof(struct tcphdr, check); > + > + if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb) > + iph->ttl = dst_metric(skb_dst(skb), RTAX_HOPLIMIT); > + > + if (skb->len > get_mtu(skb_dst(skb))) { > + if (printk_ratelimit()) > + pr_warning("%s has smaller mtu: %d\n", > + skb_dst(skb)->dev->name, > + get_mtu(skb_dst(skb))); > + err = -EINVAL; > + goto err_out; > + } > + > + if ((flags & TCP_SEND_FLAG_NOTRACE)) { > + skb->nfct = &nf_ct_untracked_get()->ct_general; > + skb->nfctinfo = IP_CT_NEW; > + nf_conntrack_get(skb->nfct); > + } > + > + pr_debug("ip_local_out: %pI4n:%hu -> %pI4n:%hu (seq=%u, " > + "ack_seq=%u mss=%hu flags=%hhx)\n", &src, ntohs(th->source), > + &dst, ntohs(th->dest), ntohl(th->seq), ack_seq, mss, > + tcp_flags); > + > + err = ip_local_out(skb); > + if (err > 0) > + err = net_xmit_errno(err); > + > + pr_debug("ip_local_out: return with %d\n", err); > +out: > + if (oskb && oskb != skb) > + kfree_skb(oskb); > + > + return err; > + > +err_out: > + kfree_skb(skb); > + goto out; > +} > + > +static int get_mss(u8 *data, int len) > +{ > + u8 olen; > + > + while (len >= TCPOLEN_MSS) { > + switch (data[0]) { > + case TCPOPT_EOL: > + return 0; > + case TCPOPT_NOP: > + data++; > + len--; > + break; > + case TCPOPT_MSS: > + if (data[1] != TCPOLEN_MSS) > + return -EINVAL; > + return get_unaligned_be16(data + 2); > + default: > + olen = data[1]; > + if (olen < 2 || olen > len) > + return -EINVAL; > + data += olen; > + len -= olen; > + break; > + } > + } > + > + return 0; > +} > + > +static DEFINE_PER_CPU(struct syn_proxy_state, syn_proxy_state); > + > +/* syn_proxy_pre isn't under the protection of nf_conntrack_proto_tcp.c */ > +static unsigned int syn_proxy_pre(struct sk_buff *skb, struct nf_conn *ct, > + enum ip_conntrack_info ctinfo) > +{ > + struct syn_proxy_state *state; > + struct iphdr *iph; > + struct tcphdr *th, _th; > + > + /* only support IPv4 now */ > + iph = ip_hdr(skb); > + if (iph->version != 4) > + return NF_ACCEPT; > + > + th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_th), &_th); > + if (th == NULL) > + return NF_DROP; > + > + if (!ct || !nf_ct_is_confirmed(ct)) { > + int ret; > + > + if (!th->syn && th->ack) { > + u16 mss; > + struct sk_buff *rec_skb; > + > + mss = cookie_v4_check_sequence(iph, th, > + ntohl(th->ack_seq) - 1); > + if (!mss) > + return NF_ACCEPT; > + > + pr_debug("%pI4n:%hu -> %pI4n:%hu(mss=%hu)\n", > + &iph->saddr, ntohs(th->source), > + &iph->daddr, ntohs(th->dest), mss); > + > + if (skb_tailroom(skb) < TCPOLEN_MSS && > + skb->len < iph->ihl * 4 + sizeof(*th) + TCPOLEN_MSS) > + rec_skb = NULL; > + else > + rec_skb = skb; > + > + local_bh_disable(); > + state = &__get_cpu_var(syn_proxy_state); > + state->seq_inited = 1; > + state->window = th->window; > + state->seq_diff = ntohl(th->ack_seq) - 1; > + if (rec_skb) > + tcp_send(iph->saddr, iph->daddr, 0, 0, > + ntohl(th->seq) - 1, 0, th->window, > + mss, TCPHDR_SYN, 0, skb->dev, > + TCP_SEND_FLAG_ACK2SYN, rec_skb); > + else > + tcp_send(iph->saddr, iph->daddr, th->source, > + th->dest, ntohl(th->seq) - 1, 0, > + th->window, mss, TCPHDR_SYN, > + iph->tos, skb->dev, 0, NULL); > + state->seq_inited = 0; > + local_bh_enable(); > + > + if (!rec_skb) > + kfree_skb(skb); > + > + return NF_STOLEN; > + } > + > + if (!ct || !th->syn || th->ack) > + return NF_ACCEPT; > + > + ret = NF_ACCEPT; > + local_bh_disable(); > + state = &__get_cpu_var(syn_proxy_state); > + if (state->seq_inited) { > + struct syn_proxy_state *nstate; > + > + nstate = nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, > + GFP_ATOMIC); > + if (nstate != NULL) { > + nstate->seq_inited = 0; > + nstate->window = state->window; > + nstate->seq_diff = state->seq_diff; > + pr_debug("seq_diff: %u\n", nstate->seq_diff); > + } else { > + ret = NF_DROP; > + } > + } > + local_bh_enable(); > + > + return ret; > + } > + > + state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); > + if (!state) > + return NF_ACCEPT; > + > + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { > + __be32 newack; > + > + /* don't need to mangle duplicate SYN packets */ > + if (th->syn && !th->ack) > + return NF_ACCEPT; > + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*th))) > + return NF_DROP; > + th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); > + newack = htonl(ntohl(th->ack_seq) - state->seq_diff); > + inet_proto_csum_replace4(&th->check, skb, th->ack_seq, newack, > + 0); > + pr_debug("alter ack seq: %u -> %u\n", > + ntohl(th->ack_seq), ntohl(newack)); > + th->ack_seq = newack; > + } else { > + /* Simultaneous open ? Oh, no. The connection between > + * client and us is established. */ > + if (th->syn && !th->ack) > + return NF_DROP; > + } > + > + return NF_ACCEPT; > +} > + > +static unsigned int syn_proxy_mangle_pkt(struct sk_buff *skb, struct iphdr *iph, > + struct tcphdr *th, u32 seq_diff) > +{ > + __be32 new; > + int olen; > + > + if (skb->len < (iph->ihl + th->doff) * 4) > + return NF_DROP; > + if (!skb_make_writable(skb, (iph->ihl + th->doff) * 4)) > + return NF_DROP; > + iph = (struct iphdr *)(skb->data); > + th = (struct tcphdr *)(skb->data + iph->ihl * 4); > + > + new = tcp_flag_word(th) & (~TCP_FLAG_SYN); > + inet_proto_csum_replace4(&th->check, skb, tcp_flag_word(th), new, 0); > + tcp_flag_word(th) = new; > + > + new = htonl(ntohl(th->seq) + seq_diff); > + inet_proto_csum_replace4(&th->check, skb, th->seq, new, 0); > + pr_debug("alter seq: %u -> %u\n", ntohl(th->seq), ntohl(new)); > + th->seq = new; > + > + olen = th->doff - sizeof(*th) / 4; > + if (olen) { > + __be32 *opt; > + > + opt = (__force __be32 *)(th + 1); > +#define TCPOPT_EOL_WORD ((TCPOPT_EOL << 24) + (TCPOPT_EOL << 16) + \ > + (TCPOPT_EOL << 8) + TCPOPT_EOL) > + inet_proto_csum_replace4(&th->check, skb, *opt, TCPOPT_EOL_WORD, > + 0); > + *opt = TCPOPT_EOL_WORD; > + } > + > + return NF_ACCEPT; > +} > + > +static unsigned int syn_proxy_post(struct sk_buff *skb, struct nf_conn *ct, > + enum ip_conntrack_info ctinfo) > +{ > + struct syn_proxy_state *state; > + struct iphdr *iph; > + struct tcphdr *th; > + > + /* untraced packets don't have NF_CT_EXT_SYNPROXY ext, as they don't > + * enter syn_proxy_pre() */ > + state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); > + if (state == NULL) > + return NF_ACCEPT; > + > + iph = ip_hdr(skb); > + if (!skb_make_writable(skb, iph->ihl * 4 + sizeof(*th))) > + return NF_DROP; > + th = (struct tcphdr *)(skb->data + iph->ihl * 4); > + if (!state->seq_inited) { > + if (th->syn) { > + /* It must be from original direction, as the ones > + * from the other side are dropped in function > + * syn_proxy_pre() */ > + if (!th->ack) > + return NF_ACCEPT; > + > + pr_debug("SYN-ACK %pI4n:%hu -> %pI4n:%hu " > + "(seq=%u ack_seq=%u)\n", > + &iph->saddr, ntohs(th->source), &iph->daddr, > + ntohs(th->dest), ntohl(th->seq), > + ntohl(th->ack_seq)); > + > + /* SYN-ACK from reply direction with the protection > + * of conntrack */ > + spin_lock_bh(&ct->lock); > + if (!state->seq_inited) { > + state->seq_inited = 1; > + pr_debug("update seq_diff %u -> %u\n", > + state->seq_diff, > + state->seq_diff - ntohl(th->seq)); > + state->seq_diff -= ntohl(th->seq); > + } > + spin_unlock_bh(&ct->lock); > + tcp_send(iph->daddr, iph->saddr, th->dest, th->source, > + ntohl(th->ack_seq), > + ntohl(th->seq) + 1 + state->seq_diff, > + state->window, 0, TCPHDR_ACK, iph->tos, > + skb->dev, 0, NULL); > + > + return syn_proxy_mangle_pkt(skb, iph, th, > + state->seq_diff + 1); > + } else { > + __be32 newseq; > + > + if (!th->rst) > + return NF_ACCEPT; > + newseq = htonl(state->seq_diff + 1); > + inet_proto_csum_replace4(&th->check, skb, th->seq, > + newseq, 0); > + pr_debug("alter RST seq: %u -> %u\n", > + ntohl(th->seq), ntohl(newseq)); > + th->seq = newseq; > + > + return NF_ACCEPT; > + } > + } > + > + /* ct should be in ESTABLISHED state, but if the ack packets from > + * us are lost. */ > + if (th->syn) { > + if (!th->ack) > + return NF_ACCEPT; > + > + tcp_send(iph->daddr, iph->saddr, th->dest, th->source, > + ntohl(th->ack_seq), > + ntohl(th->seq) + 1 + state->seq_diff, > + state->window, 0, TCPHDR_ACK, iph->tos, > + skb->dev, 0, NULL); > + > + return syn_proxy_mangle_pkt(skb, iph, th, state->seq_diff + 1); > + } > + > + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { > + __be32 newseq; > + > + newseq = htonl(ntohl(th->seq) + state->seq_diff); > + inet_proto_csum_replace4(&th->check, skb, th->seq, newseq, 0); > + pr_debug("alter seq: %u -> %u\n", ntohl(th->seq), > + ntohl(newseq)); > + th->seq = newseq; > + } > + > + return NF_ACCEPT; > +} > + > +static unsigned int tcp_process(struct sk_buff *skb) > +{ > + const struct iphdr *iph; > + const struct tcphdr *th; > + int err; > + u16 mss; > + > + iph = ip_hdr(skb); > + if (iph->frag_off & htons(IP_OFFSET)) > + goto out; > + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(*th))) > + goto out; > + th = (const struct tcphdr *)(skb->data + iph->ihl * 4); > + if ((tcp_flag_byte(th) & > + (TCPHDR_FIN | TCPHDR_RST | TCPHDR_ACK | TCPHDR_SYN)) != TCPHDR_SYN) > + goto out; > + > + if (nf_ip_checksum(skb, NF_INET_PRE_ROUTING, iph->ihl * 4, IPPROTO_TCP)) > + goto out; > + mss = 0; > + if (th->doff > sizeof(*th) / 4) { > + if (!pskb_may_pull(skb, (iph->ihl + th->doff) * 4)) > + goto out; > + err = get_mss((u8 *)(th + 1), th->doff * 4 - sizeof(*th)); > + if (err < 0) > + goto out; > + if (err != 0) > + mss = err; > + } else if (th->doff != sizeof(*th) / 4) > + goto out; > + > + tcp_send(iph->daddr, iph->saddr, th->dest, th->source, 0, > + ntohl(th->seq) + 1, 0, mss, TCPHDR_SYN | TCPHDR_ACK, > + iph->tos, skb->dev, > + TCP_SEND_FLAG_NOTRACE | TCP_SEND_FLAG_SYNCOOKIE, skb); > + > + return NF_STOLEN; > + > +out: > + return NF_DROP; > +} > + > +static unsigned int synproxy_tg(struct sk_buff *skb, > + const struct xt_action_param *par) > +{ > + struct nf_conn *ct; > + enum ip_conntrack_info ctinfo; > + int ret; > + > + /* received from lo */ > + ct = nf_ct_get(skb, &ctinfo); > + if (ct) > + return IPT_CONTINUE; > + > + local_bh_disable(); > + if (!__get_cpu_var(syn_proxy_state).seq_inited) > + ret = tcp_process(skb); > + else > + ret = IPT_CONTINUE; > + local_bh_enable(); > + > + return ret; > +} > + > +static int synproxy_tg_check(const struct xt_tgchk_param *par) > +{ > + int ret; > + > + ret = nf_ct_l3proto_try_module_get(par->family); > + if (ret < 0) > + pr_info("cannot load conntrack support for proto=%u\n", > + par->family); > + > + return ret; > +} > + > +static void synproxy_tg_destroy(const struct xt_tgdtor_param *par) > +{ > + nf_ct_l3proto_module_put(par->family); > +} > + > +static struct xt_target synproxy_tg_reg __read_mostly = { > + .name = "SYNPROXY", > + .family = NFPROTO_IPV4, > + .target = synproxy_tg, > + .table = "raw", > + .hooks = 1 << NF_INET_PRE_ROUTING, > + .proto = IPPROTO_TCP, > + .checkentry = synproxy_tg_check, > + .destroy = synproxy_tg_destroy, > + .me = THIS_MODULE, > +}; > + > +static struct nf_ct_ext_type syn_proxy_state_ext __read_mostly = { > + .len = sizeof(struct syn_proxy_state), > + .align = __alignof__(struct syn_proxy_state), > + .id = NF_CT_EXT_SYNPROXY, > +}; > + > +static int __init synproxy_tg_init(void) > +{ > + int err; > + > + rcu_assign_pointer(syn_proxy_pre_hook, syn_proxy_pre); > + rcu_assign_pointer(syn_proxy_post_hook, syn_proxy_post); > + err = nf_ct_extend_register(&syn_proxy_state_ext); > + if (err) > + goto err_out; > + err = xt_register_target(&synproxy_tg_reg); > + if (err) > + goto err_out2; > + > + return err; > + > +err_out2: > + nf_ct_extend_unregister(&syn_proxy_state_ext); > +err_out: > + rcu_assign_pointer(syn_proxy_post_hook, NULL); > + rcu_assign_pointer(syn_proxy_pre_hook, NULL); > + rcu_barrier(); > + > + return err; > +} > + > +static void __exit synproxy_tg_exit(void) > +{ > + xt_unregister_target(&synproxy_tg_reg); > + nf_ct_extend_unregister(&syn_proxy_state_ext); > + rcu_assign_pointer(syn_proxy_post_hook, NULL); > + rcu_assign_pointer(syn_proxy_pre_hook, NULL); > + rcu_barrier(); > +} > + > +module_init(synproxy_tg_init); > +module_exit(synproxy_tg_exit); > -- > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html