Hi Patrick, Please consider applying and submitting the patch below. Vitezslav Samel discovered that since 2.6.30.4+ active FTP can not work over NAT. The "cause" of the problem was a fix of unacknowledged data detection with NAT (commit a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272). However, actually, that fix uncovered a long standing bug in TCP conntrack: when NAT was enabled, we simply updated the max of the right edge of the segments we have seen (td_end), by the offset NAT produced with changing IP/port in the data. However, we did not update the other parameter (td_maxend) which is affected by the NAT offset. Thus that could drift away from the correct value and thus resulted breaking active FTP. The patch below fixes the issue by *not* updating the conntrack parameters from NAT, but instead taking into account the NAT offsets in conntrack in a consistent way. (Updating from NAT would be more harder and expensive because it'd need to re-calculate parameters we already calculated in conntrack.) Signed-off-by: Jozsef Kadlecsik <kadlec@xxxxxxxxxxxxxxxxx> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5d9a848..a96b835 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -252,11 +252,9 @@ static inline bool nf_ct_kill(struct nf_conn *ct) } /* These are for NAT. Icky. */ -/* Update TCP window tracking data when NAT mangles the packet */ -extern void nf_conntrack_tcp_update(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conn *ct, int dir, - s16 offset); +extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 237a961..4222220 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -32,4 +32,8 @@ extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, * to port ct->master->saved_proto. */ extern void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); + +extern s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); #endif diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3229e0a..a622a83 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -750,6 +750,8 @@ static int __init nf_nat_init(void) BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); + BUG_ON(nf_ct_nat_offset != NULL); + rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void) nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); + rcu_assign_pointer(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 05ede41..79e28ad 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -73,6 +73,28 @@ adjust_tcp_sequence(u32 seq, DUMP_OFFSET(this_way); } +/* Get the offset value, for conntrack */ +s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way; + s16 offset; + + if (!nat) + return 0; + + this_way = &nat->seq[dir]; + spin_lock_bh(&nf_nat_seqofs_lock); + offset = after(seq, this_way->correction_pos) + ? this_way->offset_after : this_way->offset_before; + spin_unlock_bh(&nf_nat_seqofs_lock); + + return offset; +} +EXPORT_SYMBOL_GPL(nf_nat_get_offset); + /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, unsigned int dataoff, @@ -189,11 +211,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, adjust_tcp_sequence(ntohl(tcph->seq), (int)rep_len - (int)match_len, ct, ctinfo); - /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo), - (int)rep_len - (int)match_len); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; @@ -415,12 +432,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) - return 0; - - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); - - return 1; + return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); } /* Setup NAT on this expected conntrack so it follows master. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0d961ee..f1e0c08 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1296,6 +1296,11 @@ err_stat: return ret; } +s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); +EXPORT_SYMBOL_GPL(nf_ct_nat_offset); + int nf_conntrack_init(struct net *net) { int ret; @@ -1313,6 +1318,9 @@ int nf_conntrack_init(struct net *net) /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + rcu_assign_pointer(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a38bc22..bea9428 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -482,6 +482,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } +#ifdef CONFIG_NF_NAT_NEEDED +static inline s16 nat_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); + + return get_offset != NULL ? get_offset(ct, dir, seq) : 0; +} +#define NAT_OFFSET(pf, ct, dir, seq) \ + (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#else +#define NAT_OFFSET(pf, ct, dir, seq) 0 +#endif + static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, @@ -496,6 +511,7 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; + s16 receiver_offset; bool res; /* @@ -509,11 +525,16 @@ static bool tcp_in_window(const struct nf_conn *ct, if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); + /* Take into account NAT sequence number mangling */ + receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + ack -= receiver_offset; + sack -= receiver_offset; + pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -599,8 +620,8 @@ static bool tcp_in_window(const struct nf_conn *ct, pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -686,7 +707,7 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? - after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" @@ -701,39 +722,6 @@ static bool tcp_in_window(const struct nf_conn *ct, return res; } -#ifdef CONFIG_NF_NAT_NEEDED -/* Update sender->td_end after NAT successfully mangled the packet */ -/* Caller must linearize skb at tcp header. */ -void nf_conntrack_tcp_update(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conn *ct, int dir, - s16 offset) -{ - const struct tcphdr *tcph = (const void *)skb->data + dataoff; - const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; - const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; - __u32 end; - - end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - - write_lock_bh(&tcp_lock); - /* - * We have to worry for the ack in the reply packet only... - */ - if (ct->proto.tcp.seen[dir].td_end + offset == end) - ct->proto.tcp.seen[dir].td_end = end; - ct->proto.tcp.last_end = end; - write_unlock_bh(&tcp_lock); - pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); -#endif - #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 Best regards, Jozsef - E-mail : kadlec@xxxxxxxxxxxxxxxxx, kadlec@xxxxxxxxxxxx PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt Address : KFKI Research Institute for Particle and Nuclear Physics H-1525 Budapest 114, POB. 49, Hungary -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html