On Wed, Dec 26, 2018 at 02:41:59PM +0100, Pablo Neira Ayuso wrote: > Instead of removing a empty list node that might be reintroduced soon > thereafter, tentatively place the empty list node in the garbage > collector, then re-check if the list is empty again before deleting it. > > This patch is aiming to simplify the garbage collection interaction > between the packet path and the workqueue to delete empty lists. Hm, still not good enough. Workqueue and packet path may race to place the same node in the gc_nodes[] array, leading to possible use-after-free. > Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search") > Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> > --- > @Shawn: This is an alternative proposal to your patch 1/3 and 2/3. > > include/net/netfilter/nf_conntrack_count.h | 2 - > net/netfilter/nf_conncount.c | 69 +++++++++--------------------- > 2 files changed, 20 insertions(+), 51 deletions(-) > > diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h > index 4b2b2baf8ab4..66e7bf9006e1 100644 > --- a/include/net/netfilter/nf_conntrack_count.h > +++ b/include/net/netfilter/nf_conntrack_count.h > @@ -8,14 +8,12 @@ struct nf_conncount_data; > enum nf_conncount_list_add { > NF_CONNCOUNT_ADDED, /* list add was ok */ > NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ > - NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ > }; > > struct nf_conncount_list { > spinlock_t list_lock; > struct list_head head; /* connections with the same filtering key */ > unsigned int count; /* length of list */ > - bool dead; > }; > > struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, > diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c > index 9cd180bda092..7180877ff942 100644 > --- a/net/netfilter/nf_conncount.c > +++ b/net/netfilter/nf_conncount.c > @@ -109,11 +109,6 @@ nf_conncount_add(struct nf_conncount_list *list, > conn->jiffies32 = (u32)jiffies; > conn->dead = false; > spin_lock_bh(&list->list_lock); > - if (list->dead == true) { > - kmem_cache_free(conncount_conn_cachep, conn); > - spin_unlock_bh(&list->list_lock); > - return NF_CONNCOUNT_SKIP; > - } > list_add_tail(&conn->node, &list->head); > list->count++; > spin_unlock_bh(&list->list_lock); > @@ -129,34 +124,27 @@ static void __conn_free(struct rcu_head *h) > kmem_cache_free(conncount_conn_cachep, conn); > } > > -static bool conn_free(struct nf_conncount_list *list, > +static void conn_free(struct nf_conncount_list *list, > struct nf_conncount_tuple *conn) > { > - bool free_entry = false; > - > spin_lock_bh(&list->list_lock); > > if (conn->dead) { > spin_unlock_bh(&list->list_lock); > - return free_entry; > + return; > } > > list->count--; > conn->dead = true; > list_del_rcu(&conn->node); > - if (list->count == 0) { > - list->dead = true; > - free_entry = true; > - } > > spin_unlock_bh(&list->list_lock); > call_rcu(&conn->rcu_head, __conn_free); > - return free_entry; > } > > static const struct nf_conntrack_tuple_hash * > find_or_evict(struct net *net, struct nf_conncount_list *list, > - struct nf_conncount_tuple *conn, bool *free_entry) > + struct nf_conncount_tuple *conn) > { > const struct nf_conntrack_tuple_hash *found; > unsigned long a, b; > @@ -176,7 +164,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, > */ > age = a - b; > if (conn->cpu == cpu || age >= 2) { > - *free_entry = conn_free(list, conn); > + conn_free(list, conn); > return ERR_PTR(-ENOENT); > } > > @@ -193,7 +181,6 @@ void nf_conncount_lookup(struct net *net, > struct nf_conncount_tuple *conn, *conn_n; > struct nf_conn *found_ct; > unsigned int collect = 0; > - bool free_entry = false; > > /* best effort only */ > *addit = tuple ? true : false; > @@ -203,7 +190,7 @@ void nf_conncount_lookup(struct net *net, > if (collect > CONNCOUNT_GC_MAX_NODES) > break; > > - found = find_or_evict(net, list, conn, &free_entry); > + found = find_or_evict(net, list, conn); > if (IS_ERR(found)) { > /* Not found, but might be about to be confirmed */ > if (PTR_ERR(found) == -EAGAIN) { > @@ -251,7 +238,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list) > spin_lock_init(&list->list_lock); > INIT_LIST_HEAD(&list->head); > list->count = 0; > - list->dead = false; > } > EXPORT_SYMBOL_GPL(nf_conncount_list_init); > > @@ -263,17 +249,14 @@ bool nf_conncount_gc_list(struct net *net, > struct nf_conncount_tuple *conn, *conn_n; > struct nf_conn *found_ct; > unsigned int collected = 0; > - bool free_entry = false; > bool ret = false; > > list_for_each_entry_safe(conn, conn_n, &list->head, node) { > - found = find_or_evict(net, list, conn, &free_entry); > + found = find_or_evict(net, list, conn); > if (IS_ERR(found)) { > - if (PTR_ERR(found) == -ENOENT) { > - if (free_entry) > - return true; > + if (PTR_ERR(found) == -ENOENT) > collected++; > - } > + > continue; > } > > @@ -284,8 +267,7 @@ bool nf_conncount_gc_list(struct net *net, > * closed already -> ditch it > */ > nf_ct_put(found_ct); > - if (conn_free(list, conn)) > - return true; > + conn_free(list, conn); > collected++; > continue; > } > @@ -296,10 +278,8 @@ bool nf_conncount_gc_list(struct net *net, > } > > spin_lock_bh(&list->list_lock); > - if (!list->count) { > - list->dead = true; > + if (!list->count) > ret = true; > - } > spin_unlock_bh(&list->list_lock); > > return ret; > @@ -314,17 +294,19 @@ static void __tree_nodes_free(struct rcu_head *h) > kmem_cache_free(conncount_rb_cachep, rbconn); > } > > -static void tree_nodes_free(struct rb_root *root, > - struct nf_conncount_rb *gc_nodes[], > - unsigned int gc_count) > +static void gc_tree_run(struct rb_root *root, > + struct nf_conncount_rb *gc_nodes[], > + unsigned int gc_count) > { > struct nf_conncount_rb *rbconn; > > while (gc_count) { > rbconn = gc_nodes[--gc_count]; > spin_lock(&rbconn->list.list_lock); > - rb_erase(&rbconn->node, root); > - call_rcu(&rbconn->rcu_head, __tree_nodes_free); > + if (rbconn->list.count == 0) { > + rb_erase(&rbconn->node, root); > + call_rcu(&rbconn->rcu_head, __tree_nodes_free); > + } > spin_unlock(&rbconn->list.list_lock); > } > } > @@ -375,11 +357,6 @@ insert_tree(struct net *net, > count = 0; /* hotdrop */ > } else if (ret == NF_CONNCOUNT_ADDED) { > count = rbconn->list.count; > - } else { > - /* NF_CONNCOUNT_SKIP, rbconn is already > - * reclaimed by gc, insert a new tree node > - */ > - node_found = false; > } > break; > } > @@ -392,7 +369,7 @@ insert_tree(struct net *net, > } > > if (gc_count) { > - tree_nodes_free(root, gc_nodes, gc_count); > + gc_tree_run(root, gc_nodes, gc_count); > /* tree_node_free before new allocation permits > * allocator to re-use newly free'd object. > * > @@ -476,11 +453,6 @@ count_tree(struct net *net, > return 0; /* hotdrop */ > } else if (ret == NF_CONNCOUNT_ADDED) { > return rbconn->list.count; > - } else { > - /* NF_CONNCOUNT_SKIP, rbconn is already > - * reclaimed by gc, insert a new tree node > - */ > - break; > } > } > } > @@ -512,9 +484,8 @@ static void tree_gc_worker(struct work_struct *work) > > spin_lock_bh(&nf_conncount_locks[tree]); > > - if (gc_count) { > - tree_nodes_free(root, gc_nodes, gc_count); > - } > + if (gc_count) > + gc_tree_run(root, gc_nodes, gc_count); > > clear_bit(tree, data->pending_trees); > > -- > 2.11.0 >