[PATCH net-next 3/8] netfilter: introduce max count of hw offloaded flow table entries

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To improve hardware offload debuggability extend struct netns_nftables with
'max_hw' counter and expose it to userspace as 'nf_flowtable_max_hw' sysctl
entry. Verify that count_hw value is less than max_hw and don't offload new
flow table entry to hardware, if this is not the case. Flows that were not
offloaded due to count_hw being larger than set maximum can still be
offloaded via refresh function. Mark such flows with NF_FLOW_HW bit and
only count them once by checking that the bit was previously not set.

Signed-off-by: Vlad Buslov <vladbu@xxxxxxxxxx>
Signed-off-by: Oz Shlomo <ozsh@xxxxxxxxxx>
Reviewed-by: Paul Blakey <paulb@xxxxxxxxxx>
---
 include/net/netns/nftables.h            |  1 +
 net/netfilter/nf_conntrack_standalone.c | 11 +++++++++++
 net/netfilter/nf_flow_table_core.c      | 25 +++++++++++++++++++++++--
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 262b8b3213cb..5677f21fdd4c 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
 struct netns_nftables {
 	u8			gencursor;
 	atomic_t		count_hw;
+	int			max_hw;
 };
 
 #endif
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 8cd55d502ffd..af0dea471119 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -620,6 +620,7 @@ enum nf_ct_sysctl_index {
 #endif
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	NF_SYSCTL_CT_COUNT_HW,
+	NF_SYSCTL_CT_MAX_HW,
 #endif
 
 	__NF_SYSCTL_CT_LAST_SYSCTL,
@@ -984,6 +985,12 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
+	[NF_SYSCTL_CT_MAX_HW] = {
+		.procname	= "nf_flowtable_max_hw",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #endif
 	{}
 };
@@ -1123,6 +1130,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
 	table[NF_SYSCTL_CT_COUNT_HW].data = &net->nft.count_hw;
+	table[NF_SYSCTL_CT_MAX_HW].data = &net->nft.max_hw;
 #endif
 
 	nf_conntrack_standalone_init_tcp_sysctl(net, table);
@@ -1135,6 +1143,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		table[NF_SYSCTL_CT_MAX].mode = 0444;
 		table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
 		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+		table[NF_SYSCTL_CT_MAX_HW].mode = 0444;
+#endif
 	}
 
 	cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 9f2b68bc6f80..2631bd0ae9ae 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -290,6 +290,20 @@ unsigned long flow_offload_get_timeout(struct flow_offload *flow)
 	return timeout;
 }
 
+static bool flow_offload_inc_count_hw(struct nf_flowtable *flow_table)
+{
+	struct net *net = read_pnet(&flow_table->net);
+	int max_hw = net->nft.max_hw, count_hw;
+
+	count_hw = atomic_inc_return(&net->nft.count_hw);
+	if (max_hw && count_hw > max_hw) {
+		atomic_dec(&net->nft.count_hw);
+		return false;
+	}
+
+	return true;
+}
+
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
 	int err;
@@ -315,9 +329,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 	nf_ct_offload_timeout(flow->ct);
 
 	if (nf_flowtable_hw_offload(flow_table)) {
-		struct net *net = read_pnet(&flow_table->net);
+		if (!flow_offload_inc_count_hw(flow_table))
+			return 0;
 
-		atomic_inc(&net->nft.count_hw);
 		__set_bit(NF_FLOW_HW, &flow->flags);
 		nf_flow_offload_add(flow_table, flow);
 	}
@@ -329,6 +343,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 void flow_offload_refresh(struct nf_flowtable *flow_table,
 			  struct flow_offload *flow)
 {
+	struct net *net = read_pnet(&flow_table->net);
 	u32 timeout;
 
 	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
@@ -338,6 +353,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
 	if (likely(!nf_flowtable_hw_offload(flow_table)))
 		return;
 
+	if (!flow_offload_inc_count_hw(flow_table))
+		return;
+	/* only count each flow once when setting NF_FLOW_HW bit */
+	if (test_and_set_bit(NF_FLOW_HW, &flow->flags))
+		atomic_dec(&net->nft.count_hw);
+
 	nf_flow_offload_add(flow_table, flow);
 }
 EXPORT_SYMBOL_GPL(flow_offload_refresh);
-- 
2.31.1




[Index of Archives]     [Netfitler Users]     [Berkeley Packet Filter]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux