[PATCH nf 2/2] netfilter: nft_set_rbtree: remove async GC

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Let sync GC remove entries from the tree instead. The sync GC read
spinlock might still stall an update from the control plane with a
sufficiently large tree.

Fixes: 96b33300fba8 ("netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention")
Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>
---
Hi Florian,

You asked for this.

 net/netfilter/nft_set_rbtree.c | 90 ----------------------------------
 1 file changed, 90 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index de6d812fc790..17500f4f6f1d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,7 +19,6 @@ struct nft_rbtree {
 	struct rb_root		root;
 	rwlock_t		lock;
 	seqcount_rwlock_t	count;
-	struct delayed_work	gc_work;
 };
 
 struct nft_rbtree_elem {
@@ -626,89 +625,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 	read_unlock_bh(&priv->lock);
 }
 
-static void nft_rbtree_gc(struct work_struct *work)
-{
-	struct nft_rbtree_elem *rbe, *rbe_end = NULL;
-	struct nftables_pernet *nft_net;
-	struct nft_rbtree *priv;
-	struct nft_trans_gc *gc;
-	struct rb_node *node;
-	struct nft_set *set;
-	unsigned int gc_seq;
-	struct net *net;
-
-	priv = container_of(work, struct nft_rbtree, gc_work.work);
-	set  = nft_set_container_of(priv);
-	net  = read_pnet(&set->net);
-	nft_net = nft_pernet(net);
-	gc_seq  = READ_ONCE(nft_net->gc_seq);
-
-	if (nft_set_gc_is_pending(set))
-		goto done;
-
-	gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
-	if (!gc)
-		goto done;
-
-	read_lock_bh(&priv->lock);
-	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
-
-		/* Ruleset has been updated, try later. */
-		if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
-			nft_trans_gc_destroy(gc);
-			gc = NULL;
-			goto try_later;
-		}
-
-		rbe = rb_entry(node, struct nft_rbtree_elem, node);
-
-		if (nft_set_elem_is_dead(&rbe->ext))
-			goto dead_elem;
-
-		/* elements are reversed in the rbtree for historical reasons,
-		 * from highest to lowest value, that is why end element is
-		 * always visited before the start element.
-		 */
-		if (nft_rbtree_interval_end(rbe)) {
-			rbe_end = rbe;
-			continue;
-		}
-		if (!nft_set_elem_expired(&rbe->ext))
-			continue;
-
-		nft_set_elem_dead(&rbe->ext);
-
-		if (!rbe_end)
-			continue;
-
-		nft_set_elem_dead(&rbe_end->ext);
-
-		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
-		if (!gc)
-			goto try_later;
-
-		nft_trans_gc_elem_add(gc, rbe_end);
-		rbe_end = NULL;
-dead_elem:
-		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
-		if (!gc)
-			goto try_later;
-
-		nft_trans_gc_elem_add(gc, rbe);
-	}
-
-	gc = nft_trans_gc_catchall_async(gc, gc_seq);
-
-try_later:
-	read_unlock_bh(&priv->lock);
-
-	if (gc)
-		nft_trans_gc_queue_async_done(gc);
-done:
-	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-			   nft_set_gc_interval(set));
-}
-
 static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
 			       const struct nft_set_desc *desc)
 {
@@ -725,11 +641,6 @@ static int nft_rbtree_init(const struct nft_set *set,
 	seqcount_rwlock_init(&priv->count, &priv->lock);
 	priv->root = RB_ROOT;
 
-	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
-	if (set->flags & NFT_SET_TIMEOUT)
-		queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-				   nft_set_gc_interval(set));
-
 	return 0;
 }
 
@@ -740,7 +651,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
 	struct nft_rbtree_elem *rbe;
 	struct rb_node *node;
 
-	cancel_delayed_work_sync(&priv->gc_work);
 	rcu_barrier();
 	while ((node = priv->root.rb_node) != NULL) {
 		rb_erase(node, &priv->root);
-- 
2.30.2




[Index of Archives]     [Netfitler Users]     [Berkeley Packet Filter]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux