Re: [PATCH 2/3] set SEEN_REPLY before destroying conntrack on TCP RST

Patrick McHardy <kaber@xxxxxxxxx> · Tue, 27 May 2008 06:53:22 +0200

Fabian Hugelshofer wrote:
On Thu, 2008-05-22 at 10:13 +0100, Fabian Hugelshofer wrote:
If a connection fails with a TCP reset, the conntrack is destroyed 
immediately. This patch sets the SEEN_REPLY bit before destroying the 
conntrack.

This updated version also increments the accounting counters.

Thanks, but this needs to be changed slightly.

Do I see this right, that the commits should be observable in the
patch-o-matic git, if they are done?

No, they go in my nf-next-2.6.git tree. I'm uploading it
to (takes about 15 minutes):

git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git

--- linux-2.6.25.4.orig/net/netfilter/nf_conntrack_proto_tcp.c	2008-05-26 17:43:01.000000000 +0100
+++ linux-2.6.25.4/net/netfilter/nf_conntrack_proto_tcp.c	2008-05-26 17:32:17.000000000 +0100
@@ -22,6 +22,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_log.h>
@@ -960,8 +961,19 @@
 		/* If only reply is a RST, we can consider ourselves not to
 		   have an established connection: this is a fairly common
 		   problem case, so we can delete the conntrack
-		   immediately.  --RR */
+		   immediately.  --RR
+		   The SEEN_REPLY bit and the accounting counters are updated
+		   here to have the correct information in the ct event. */
 		if (th->rst) {
+			if (ctinfo >= IP_CT_IS_REPLY)
+				set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+#ifdef CONFIG_NF_CT_ACCT
+			spin_lock_bh(&nf_conntrack_lock);
> +			ct->counters[CTINFO2DIR(ctinfo)].packets++;
> +			ct->counters[CTINFO2DIR(ctinfo)].bytes +=

I'm trying to get rid of nf_conntrack_lock uses outside of the
connection tracking core, so this is a move in the wrong direction.
Also the same needs to be done for ICMP, DCCP, ICMPv6 and possibly
more.

+				skb->len - skb_network_offset(skb);
+			spin_unlock_bh(&nf_conntrack_lock);
+#endif
 			if (del_timer(&ct->timeout))
 				ct->timeout.function((unsigned long)ct);

I think a better way is to encapsulate the del_timer/timeout.function
calls in a nf_ct_kill() function and perform accounting there.
Since all manual invocations of timeout.function are/should be
performed only while handling packets (that are usually not
accounted), this seems like the right way.

The attached patch (also queued in the tree mentioned above) does
the encapsulation. Please split your patch into two parts
(SEEN_REPLY and accounting) and add a Signed-off-by: line.


commit bb2d3ddb8fe46f387eeb92220050dcd76e5b6071
Author: Patrick McHardy <kaber@xxxxxxxxx>
Date:   Tue May 27 06:46:40 2008 +0200

    [NETFILTER]: nf_conntrack: add nf_ct_kill()
    
    Encapsulate the common
    
    	if (del_timer(&ct->timeout))
    		ct->timeout.function((unsigned long)ct)
    
    sequence in a new function.
    
    Signed-off-by: Patrick McHardy <kaber@xxxxxxxxx>

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2dbd6c0..fc19ab2 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -223,6 +223,8 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
 	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
+extern void nf_ct_kill(struct nf_conn *ct);
+
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
 extern void nf_conntrack_tcp_update(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 78ab19a..0e21a46 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct,
 	   means this will only run once even if count hits zero twice
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		if (atomic_dec_and_test(&ct->proto.icmp.count))
+			nf_ct_kill(ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ee713b0..fe081b9 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -89,9 +89,8 @@ static int icmpv6_packet(struct nf_conn *ct,
 	   means this will only run once even if count hits zero twice
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		if (atomic_dec_and_test(&ct->proto.icmp.count))
+			nf_ct_kill(ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4b1799..79b07c3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -848,6 +848,13 @@ acct:
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
+void nf_ct_kill(struct nf_conn *ct)
+{
+	if (del_timer(&ct->timeout))
+		ct->timeout.function((unsigned long)ct);
+}
+EXPORT_SYMBOL_GPL(nf_ct_kill);
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 13918c1..ab655f6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -812,9 +812,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 		}
 	}
-	if (del_timer(&ct->timeout))
-		ct->timeout.function((unsigned long)ct);
 
+	nf_ct_kill(ct);
 	nf_ct_put(ct);
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index afb4a18..223742f 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -475,8 +475,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	if (type == DCCP_PKT_RESET &&
 	    !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
 		/* Tear down connection immediately if only reply is a RESET */
-		if (del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		nf_ct_kill(ct);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ba94004..c4aa11e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -843,8 +843,7 @@ static int tcp_packet(struct nf_conn *ct,
 			/* Attempt to reopen a closed/aborted connection.
 			 * Delete this connection and look up again. */
 			write_unlock_bh(&tcp_lock);
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return -NF_REPEAT;
 		}
 		/* Fall through */
@@ -877,8 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
 			if (LOG_INVALID(IPPROTO_TCP))
 				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 					  "nf_ct_tcp: killing out of sync session ");
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return -NF_DROP;
 		}
 		ct->proto.tcp.last_index = index;
@@ -961,8 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   problem case, so we can delete the conntrack
 		   immediately.  --RR */
 		if (th->rst) {
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return NF_ACCEPT;
 		}
 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)