Re: [PATCH nf 3/3] netfilter: nf_conncount: speculative garbage collection on empty lists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 26, 2018 at 11:08:43PM +0100, Pablo Neira Ayuso wrote:
> Instead of removing a empty list node that might be reintroduced soon
> thereafter, tentatively place the empty list node in the garbage
> collector, then re-check if the list is empty again before deleting it.
> 
> Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
> Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>
> ---
>  include/net/netfilter/nf_conntrack_count.h |  2 -
>  net/netfilter/nf_conncount.c               | 62 ++++++++----------------------
>  2 files changed, 16 insertions(+), 48 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
> index 4b2b2baf8ab4..66e7bf9006e1 100644
> --- a/include/net/netfilter/nf_conntrack_count.h
> +++ b/include/net/netfilter/nf_conntrack_count.h
> @@ -8,14 +8,12 @@ struct nf_conncount_data;
>  enum nf_conncount_list_add {
>  	NF_CONNCOUNT_ADDED, 	/* list add was ok */
>  	NF_CONNCOUNT_ERR,	/* -ENOMEM, must drop skb */
> -	NF_CONNCOUNT_SKIP,	/* list is already reclaimed by gc */
>  };
>  
>  struct nf_conncount_list {
>  	spinlock_t list_lock;
>  	struct list_head head;	/* connections with the same filtering key */
>  	unsigned int count;	/* length of list */
> -	bool dead;
>  };
>  
>  struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
> diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
> index 68788b979c1d..b33df77bd7a3 100644
> --- a/net/netfilter/nf_conncount.c
> +++ b/net/netfilter/nf_conncount.c
> @@ -103,11 +103,6 @@ nf_conncount_add(struct nf_conncount_list *list,
>  	conn->cpu = raw_smp_processor_id();
>  	conn->jiffies32 = (u32)jiffies;
>  	spin_lock_bh(&list->list_lock);
> -	if (list->dead == true) {
> -		kmem_cache_free(conncount_conn_cachep, conn);
> -		spin_unlock_bh(&list->list_lock);
> -		return NF_CONNCOUNT_SKIP;
> -	}
>  	list_add_tail(&conn->node, &list->head);
>  	list->count++;
>  	spin_unlock_bh(&list->list_lock);
> @@ -123,25 +118,17 @@ static void __conn_free(struct rcu_head *h)
>  	kmem_cache_free(conncount_conn_cachep, conn);
>  }
>  
> -static bool conn_free(struct nf_conncount_list *list,
> +static void conn_free(struct nf_conncount_list *list,
>  		      struct nf_conncount_tuple *conn)
>  {
> -	bool free_entry = false;
> -
>  	list->count--;
>  	list_del_rcu(&conn->node);
> -	if (list->count == 0) {
> -		list->dead = true;
> -		free_entry = true;
> -	}
> -
>  	call_rcu(&conn->rcu_head, __conn_free);
> -	return free_entry;
>  }
>  
>  static const struct nf_conntrack_tuple_hash *
>  find_or_evict(struct net *net, struct nf_conncount_list *list,
> -	      struct nf_conncount_tuple *conn, bool *free_entry)
> +	      struct nf_conncount_tuple *conn)
>  {
>  	const struct nf_conntrack_tuple_hash *found;
>  	unsigned long a, b;
> @@ -161,7 +148,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
>  	 */
>  	age = a - b;
>  	if (conn->cpu == cpu || age >= 2) {
> -		*free_entry = conn_free(list, conn);
> +		conn_free(list, conn);
>  		return ERR_PTR(-ENOENT);
>  	}
>  
> @@ -178,7 +165,6 @@ void nf_conncount_lookup(struct net *net,
>  	struct nf_conncount_tuple *conn, *conn_n;
>  	struct nf_conn *found_ct;
>  	unsigned int collect = 0;
> -	bool free_entry = false;
>  
>  	/* best effort only */
>  	*addit = tuple ? true : false;
> @@ -189,7 +175,7 @@ void nf_conncount_lookup(struct net *net,
>  		if (collect > CONNCOUNT_GC_MAX_NODES)
>  			break;
>  
> -		found = find_or_evict(net, list, conn, &free_entry);
> +		found = find_or_evict(net, list, conn);
>  		if (IS_ERR(found)) {
>  			/* Not found, but might be about to be confirmed */
>  			if (PTR_ERR(found) == -EAGAIN) {
> @@ -238,7 +224,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
>  	spin_lock_init(&list->list_lock);
>  	INIT_LIST_HEAD(&list->head);
>  	list->count = 0;
> -	list->dead = false;
>  }
>  EXPORT_SYMBOL_GPL(nf_conncount_list_init);
>  
> @@ -250,20 +235,15 @@ bool nf_conncount_gc_list(struct net *net,
>  	struct nf_conncount_tuple *conn, *conn_n;
>  	struct nf_conn *found_ct;
>  	unsigned int collected = 0;
> -	bool free_entry = false;
>  	bool ret = false;
>  
>  	spin_lock(&list->list_lock);
>  	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
> -		found = find_or_evict(net, list, conn, &free_entry);
> +		found = find_or_evict(net, list, conn);
>  		if (IS_ERR(found)) {
> -			if (PTR_ERR(found) == -ENOENT)  {
> -				if (free_entry) {
> -					spin_unlock(&list->list_lock);
> -					return true;
> -				}
> +			if (PTR_ERR(found) == -ENOENT)
>  				collected++;
> -			}
> +
>  			continue;
>  		}
>  
> @@ -274,10 +254,7 @@ bool nf_conncount_gc_list(struct net *net,
>  			 * closed already -> ditch it
>  			 */
>  			nf_ct_put(found_ct);
> -			if (conn_free(list, conn)) {
> -				spin_unlock(&list->list_lock);
> -				return true;
> -			}
> +			conn_free(list, conn);
>  			collected++;
>  			continue;
>  		}
> @@ -287,10 +264,8 @@ bool nf_conncount_gc_list(struct net *net,
>  			break;
>  	}
>  
> -	if (!list->count) {
> -		list->dead = true;
> +	if (!list->count)
>  		ret = true;
> -	}
>  	spin_unlock(&list->list_lock);
>  
>  	return ret;
> @@ -310,13 +285,18 @@ static void tree_nodes_free(struct rb_root *root,
>  			    unsigned int gc_count)
>  {
>  	struct nf_conncount_rb *rbconn;
> +	bool release = false;
>  
>  	while (gc_count) {
>  		rbconn = gc_nodes[--gc_count];
>  		spin_lock(&rbconn->list.list_lock);
> -		rb_erase(&rbconn->node, root);
> -		call_rcu(&rbconn->rcu_head, __tree_nodes_free);
> +		if (!rbconn->list.count) {
> +			release = true;
> +			rb_erase(&rbconn->node, root);
> +		}
>  		spin_unlock(&rbconn->list.list_lock);
> +		if (release)
> +			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
>  	}
>  }
>  
> @@ -360,11 +340,6 @@ insert_tree(struct net *net,
>  				count = 0; /* hotdrop */
>  			} else if (ret == NF_CONNCOUNT_ADDED) {
>  				count = rbconn->list.count;
> -			} else {
> -				/* NF_CONNCOUNT_SKIP, rbconn is already
> -				 * reclaimed by gc, insert a new tree node
> -				 */
> -				node_found = false;
>  			}
>  			break;
>  		}
> @@ -451,11 +426,6 @@ count_tree(struct net *net,
>  				return 0; /* hotdrop */
>  			} else if (ret == NF_CONNCOUNT_ADDED) {
>  				return rbconn->list.count;
> -			} else {
> -				/* NF_CONNCOUNT_SKIP, rbconn is already
> -				 * reclaimed by gc, insert a new tree node
> -				 */
> -				break;
>  			}
>  		}
>  	}

The packet path can run concurrently on multiple CPUs correct?  If
yes, then there is a race where an empty node is about to be freed by
tree_nodes_free() which holds the rbconn->list.list_lock, while
nf_conncount_add() inside count_tree is waiting to acquire the lock.
Once tree_nodes_free() releases the lock nf_conncount_add() will add
the connection to the list, but it is too late the node has already
been rb_erased from the tree.

I think count_tree needs to break here if addit is true.  Then
insert_tree will handle calling nf_conncount_add() while holding both
the rbconn->list.list_lock and nf_conncount_locks.

--
Shawn



[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux