On Mon, Mar 18, 2024 at 10:39:15AM +0100, Pablo Neira Ayuso wrote: > In case that either FIN or RST packet is seen, infer current TCP state > based on the TCP packet flags before setting the _TEARDOWN flag: > > - FIN packets result in TCP_CONNTRACK_FIN_WAIT which uses a default > timeout of 2 minutes. > - RST packets lead to tcp_state TCP_CONNTRACK_CLOSE of 10 seconds. > > Therefore, TCP established state with a low timeout is not used anymore > when handing over the flow to the classic conntrack path, otherwise a > FIN packet coming in the reply direction could re-offload this flow > again. > > If flow teardown is required for other reasons, eg. no traffic seen > after NF_FLOW_TIMEOUT, then use TCP established state but set timeout to > TCP_CONNTRACK_UNACK state which is used in conntrack to pick up > connections from the middle (default is 5 minutes). > > Fixes: e5eaac2beb54 ("netfilter: flowtable: fix TCP flow teardown") > Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> > --- > Compile-tested only. Thanks Pablo, I will test both patches on a production system this week and give you feedback. Both patches look good to me though and should fix the timout problem. > > include/net/netfilter/nf_flow_table.h | 1 + > net/netfilter/nf_flow_table_core.c | 45 +++++++++++++++++++++------ > net/netfilter/nf_flow_table_ip.c | 2 +- > 3 files changed, 37 insertions(+), 11 deletions(-) > > diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h > index a763dd327c6e..924f3720143f 100644 > --- a/include/net/netfilter/nf_flow_table.h > +++ b/include/net/netfilter/nf_flow_table.h > @@ -293,6 +293,7 @@ int nf_flow_table_init(struct nf_flowtable *flow_table); > void nf_flow_table_free(struct nf_flowtable *flow_table); > > void flow_offload_teardown(struct flow_offload *flow); > +void flow_offload_teardown_tcp(struct flow_offload *flow, bool fin); > > void nf_flow_snat_port(const struct flow_offload *flow, > struct sk_buff *skb, unsigned int thoff, > diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c > index a0571339239c..481fe3d96bbc 100644 > --- a/net/netfilter/nf_flow_table_core.c > +++ b/net/netfilter/nf_flow_table_core.c > @@ -165,10 +165,22 @@ void flow_offload_route_init(struct flow_offload *flow, > } > EXPORT_SYMBOL_GPL(flow_offload_route_init); > > -static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) > +static s32 flow_offload_fixup_tcp(struct net *net, struct nf_conn *ct, > + enum tcp_conntrack tcp_state) > { > - tcp->seen[0].td_maxwin = 0; > - tcp->seen[1].td_maxwin = 0; > + struct nf_tcp_net *tn = nf_tcp_pernet(net); > + > + ct->proto.tcp.state = tcp_state; > + ct->proto.tcp.seen[0].td_maxwin = 0; > + ct->proto.tcp.seen[1].td_maxwin = 0; > + > + /* Similar to mid-connection pickup with loose=1. > + * Avoid large ESTABLISHED timeout. > + */ > + if (tcp_state == TCP_CONNTRACK_ESTABLISHED) > + return tn->timeouts[TCP_CONNTRACK_UNACK]; > + > + return tn->timeouts[tcp_state]; > } > > static void flow_offload_fixup_ct(struct nf_conn *ct) > @@ -178,12 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) > s32 timeout; > > if (l4num == IPPROTO_TCP) { > - struct nf_tcp_net *tn = nf_tcp_pernet(net); > - > - flow_offload_fixup_tcp(&ct->proto.tcp); > - > - timeout = tn->timeouts[ct->proto.tcp.state]; > - timeout -= tn->offload_timeout; > + timeout = flow_offload_fixup_tcp(net, ct, > + TCP_CONNTRACK_ESTABLISHED); > } else if (l4num == IPPROTO_UDP) { > struct nf_udp_net *tn = nf_udp_pernet(net); > enum udp_conntrack state = > @@ -346,12 +354,29 @@ static void flow_offload_del(struct nf_flowtable *flow_table, > > void flow_offload_teardown(struct flow_offload *flow) > { > + flow_offload_fixup_ct(flow->ct); > + smp_mb__before_atomic(); > clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); > set_bit(NF_FLOW_TEARDOWN, &flow->flags); > - flow_offload_fixup_ct(flow->ct); > } > EXPORT_SYMBOL_GPL(flow_offload_teardown); > > +void flow_offload_teardown_tcp(struct flow_offload *flow, bool fin) > +{ > + enum tcp_conntrack tcp_state; > + > + if (fin) > + tcp_state = TCP_CONNTRACK_FIN_WAIT; > + else /* rst */ > + tcp_state = TCP_CONNTRACK_CLOSE; > + > + flow_offload_fixup_tcp(nf_ct_net(flow->ct), flow->ct, tcp_state); > + smp_mb__before_atomic(); > + clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); > + set_bit(NF_FLOW_TEARDOWN, &flow->flags); > +} > +EXPORT_SYMBOL_GPL(flow_offload_teardown_tcp); > + > struct flow_offload_tuple_rhash * > flow_offload_lookup(struct nf_flowtable *flow_table, > struct flow_offload_tuple *tuple) > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c > index e45fade76409..13b6c453d8bc 100644 > --- a/net/netfilter/nf_flow_table_ip.c > +++ b/net/netfilter/nf_flow_table_ip.c > @@ -29,7 +29,7 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, > > tcph = (void *)(skb_network_header(skb) + thoff); > if (unlikely(tcph->fin || tcph->rst)) { > - flow_offload_teardown(flow); > + flow_offload_teardown_tcp(flow, tcph->fin); > return -1; > } > > -- > 2.30.2 >