[PATCH] nf_flowtable: teardown fix race condition

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The nf flowtable teardown forces a tcp state into established state
with the corresponding timeout and is in a race condition with
the conntrack code.
This might happen even though the state is already in a CLOSE or
FIN WAIT state and about to be closed.
In order to process the correct state, a TCP connection needs to be
set to established in the flowtable software and hardware case.
Also this is a bit optimistic as we actually do not check for the
3 way handshake ACK at this point, we do not really have a choice.

This is also fixing a race condition between the ct gc code
and the flowtable teardown where the ct might already be removed
when the flowtable teardown code runs.

Signed-off-by: Sven Auhagen <sven.auhagen@xxxxxxxxxxxx>

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 87a7388b6c89..898ea2fc833e 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -5,6 +5,7 @@
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <linux/spinlock.h>
 #include <net/ip.h>
 #include <net/ip6_route.h>
 #include <net/netfilter/nf_tables.h>
@@ -171,30 +172,32 @@ int flow_offload_route_init(struct flow_offload *flow,
 }
 EXPORT_SYMBOL_GPL(flow_offload_route_init);
 
-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
-{
-	tcp->state = TCP_CONNTRACK_ESTABLISHED;
-	tcp->seen[0].td_maxwin = 0;
-	tcp->seen[1].td_maxwin = 0;
-}
 
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
 	s32 timeout;
 
+	spin_lock_bh(&ct->lock);
+
 	if (l4num == IPPROTO_TCP) {
-		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+		ct->proto.tcp.seen[0].td_maxwin = 0;
+		ct->proto.tcp.seen[1].td_maxwin = 0;
 
-		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
-		timeout -= tn->offload_timeout;
+		if (nf_conntrack_tcp_established(ct)) {
+			struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+			timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+			timeout -= tn->offload_timeout;
+		}
 	} else if (l4num == IPPROTO_UDP) {
 		struct nf_udp_net *tn = nf_udp_pernet(net);
 
 		timeout = tn->timeouts[UDP_CT_REPLIED];
 		timeout -= tn->offload_timeout;
 	} else {
+		spin_unlock_bh(&ct->lock);
 		return;
 	}
 
@@ -203,18 +206,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 
 	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 		ct->timeout = nfct_time_stamp + timeout;
-}
 
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
-	if (nf_ct_protonum(ct) == IPPROTO_TCP)
-		flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
-	flow_offload_fixup_ct_state(ct);
-	flow_offload_fixup_ct_timeout(ct);
+	spin_unlock_bh(&ct->lock);
 }
 
 static void flow_offload_route_release(struct flow_offload *flow)
@@ -354,12 +347,9 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       nf_flow_offload_rhash_params);
 
-	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+	flow_offload_fixup_ct(flow->ct);
 
-	if (nf_flow_has_expired(flow))
-		flow_offload_fixup_ct(flow->ct);
-	else
-		flow_offload_fixup_ct_timeout(flow->ct);
+	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
 
 	flow_offload_free(flow);
 }
@@ -367,8 +357,6 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 void flow_offload_teardown(struct flow_offload *flow)
 {
 	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
-	flow_offload_fixup_ct_state(flow->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 889cf88d3dba..990128cb7a61 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -10,6 +10,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
+#include <linux/spinlock.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -34,6 +35,13 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
 		return -1;
 	}
 
+	if (unlikely(!test_bit(IPS_ASSURED_BIT, &flow->ct->status))) {
+		spin_lock_bh(&flow->ct->lock);
+		flow->ct->proto.tcp.state = TCP_CONNTRACK_ESTABLISHED;
+		spin_unlock_bh(&flow->ct->lock);
+		set_bit(IPS_ASSURED_BIT, &flow->ct->status);
+	}
+
 	return 0;
 }
 
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b561e0a44a45..63bf1579e75f 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -5,6 +5,7 @@
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
 #include <linux/tc_act/tc_csum.h>
+#include <linux/spinlock.h>
 #include <net/flow_offload.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables.h>
@@ -953,11 +954,22 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
 static void flow_offload_work_handler(struct work_struct *work)
 {
 	struct flow_offload_work *offload;
+	struct flow_offload_tuple *tuple;
+	struct flow_offload *flow;
 
 	offload = container_of(work, struct flow_offload_work, work);
 	switch (offload->cmd) {
 		case FLOW_CLS_REPLACE:
 			flow_offload_work_add(offload);
+			/* Set the TCP connection to established or teardown does not work */
+			flow = offload->flow;
+			tuple = &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple;
+			if (tuple->l4proto == IPPROTO_TCP && !test_bit(IPS_ASSURED_BIT, &flow->ct->status)) {
+				spin_lock_bh(&flow->ct->lock);
+				flow->ct->proto.tcp.state = TCP_CONNTRACK_ESTABLISHED;
+				spin_unlock_bh(&flow->ct->lock);
+				set_bit(IPS_ASSURED_BIT, &flow->ct->status);
+			}
 			break;
 		case FLOW_CLS_DESTROY:
 			flow_offload_work_del(offload);
-- 
2.33.1




[Index of Archives]     [Netfitler Users]     [Berkeley Packet Filter]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux