Remove gc sequence counter, rely only on the DEAD lookup. Concurrent set removal is fine, we still hold a reference to the set structure. netns can't go away, the trans_gc structure increments the netns refcount. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- include/net/netfilter/nf_tables.h | 13 ++------ net/netfilter/nf_tables_api.c | 55 +++++-------------------------- net/netfilter/nft_set_hash.c | 18 ++-------- net/netfilter/nft_set_pipapo.c | 2 +- net/netfilter/nft_set_rbtree.c | 4 +-- 5 files changed, 16 insertions(+), 76 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f0b85944e9eb..c8b8e2066f78 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -605,11 +605,6 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } -static inline bool nft_set_gc_is_pending(const struct nft_set *s) -{ - return refcount_read(&s->refs) != 1; -} - static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); @@ -1744,18 +1739,15 @@ struct nft_trans_gc { struct list_head list; struct net *net; struct nft_set *set; - u32 seq; u16 count; struct nft_trans_gc_key keys[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; -struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, - unsigned int gc_seq, gfp_t gfp); +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); -struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, - unsigned int gc_seq, gfp_t gfp); +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, gfp_t gfp); void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); @@ -1797,7 +1789,6 @@ struct nftables_pernet { u64 table_handle; u64 tstamp; unsigned int base_seq; - unsigned int gc_seq; u8 validate_state; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8accb8498479..7a438392977c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9767,7 +9767,7 @@ static void nft_trans_gc_catchall(struct nft_ctx *ctx, struct nft_set *set) if (!nft_set_elem_expired(ext)) continue; - gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, GFP_KERNEL); break; } @@ -9803,12 +9803,10 @@ static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) mutex_lock(&nft_net->commit_mutex); - /* Check for race with transaction, otherwise this batch refers to - * stale objects that might not be there anymore. Skip transaction if - * set has been destroyed from control plane transaction in case gc - * worker loses race. + /* Check for race with transaction. + * Set could have been destroyed from control plane. */ - if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { + if (trans->set->dead) { mutex_unlock(&nft_net->commit_mutex); return false; } @@ -9842,8 +9840,7 @@ static void nft_trans_gc_work(struct work_struct *work) } } -struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, - unsigned int gc_seq, gfp_t gfp) +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, gfp_t gfp) { struct net *net = read_pnet(&set->net); struct nft_trans_gc *trans; @@ -9860,7 +9857,6 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, refcount_inc(&set->refs); trans->set = set; - trans->seq = gc_seq; return trans; } @@ -9896,8 +9892,7 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) schedule_work(&trans_gc_work); } -struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, - unsigned int gc_seq, gfp_t gfp) +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, gfp_t gfp) { struct nft_set *set; @@ -9907,7 +9902,7 @@ struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, set = gc->set; nft_trans_gc_queue_work(gc); - return nft_trans_gc_alloc(set, gc_seq, gfp); + return nft_trans_gc_alloc(set, gfp); } void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans) @@ -9933,7 +9928,7 @@ struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp) set = gc->set; call_rcu(&gc->rcu, nft_trans_gc_trans_free); - return nft_trans_gc_alloc(set, 0, gfp); + return nft_trans_gc_alloc(set, gfp); } void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) @@ -10116,31 +10111,15 @@ static void nft_set_commit_update(struct list_head *set_update_list) } } -static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) -{ - unsigned int gc_seq; - - /* Bump gc counter, it becomes odd, this is the busy mark. */ - gc_seq = READ_ONCE(nft_net->gc_seq); - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); - - return gc_seq; -} - -static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) -{ - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); -} - static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; - unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + unsigned int base_seq; LIST_HEAD(adl); int err; @@ -10219,8 +10198,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) WRITE_ONCE(nft_net->base_seq, base_seq); - gc_seq = nft_gc_seq_begin(nft_net); - /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -10432,7 +10409,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); - nft_gc_seq_end(nft_net, gc_seq); nft_net->validate_state = NFT_VALIDATE_SKIP; nf_tables_commit_release(net); @@ -10715,12 +10691,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); - unsigned int gc_seq; int ret; - gc_seq = nft_gc_seq_begin(nft_net); ret = __nf_tables_abort(net, action); - nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); return ret; @@ -11443,7 +11416,6 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, struct net *net = n->net; unsigned int deleted; bool restart = false; - unsigned int gc_seq; if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) return NOTIFY_DONE; @@ -11452,8 +11424,6 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, deleted = 0; mutex_lock(&nft_net->commit_mutex); - gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) nf_tables_trans_destroy_flush_work(); again: @@ -11480,7 +11450,6 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, if (restart) goto again; } - nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); @@ -11502,7 +11471,6 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); nft_net->base_seq = 1; - nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; @@ -11520,20 +11488,15 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net) static void __net_exit nf_tables_exit_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); - unsigned int gc_seq; mutex_lock(&nft_net->commit_mutex); - gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || !list_empty(&nft_net->module_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); - nft_gc_seq_end(nft_net, gc_seq); - mutex_unlock(&nft_net->commit_mutex); WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 4b3cc2e17784..c8210ed95c50 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -323,25 +323,18 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, static void nft_rhash_gc(struct work_struct *work) { - struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; struct rhashtable_iter hti; struct nft_trans_gc *gc; struct net *net; - u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); net = read_pnet(&set->net); - nft_net = nft_pernet(net); - gc_seq = READ_ONCE(nft_net->gc_seq); - if (nft_set_gc_is_pending(set)) - goto done; - - gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, GFP_KERNEL); if (!gc) goto done; @@ -355,13 +348,6 @@ static void nft_rhash_gc(struct work_struct *work) goto try_later; } - /* Ruleset has been updated, try later. */ - if (READ_ONCE(nft_net->gc_seq) != gc_seq) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } - if (nft_set_elem_is_dead(&he->ext)) goto dead_elem; @@ -374,7 +360,7 @@ static void nft_rhash_gc(struct work_struct *work) needs_gc_run: nft_set_elem_dead(&he->ext); dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + gc = nft_trans_gc_queue_async(gc, GFP_ATOMIC); if (!gc) goto try_later; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 35308de428c6..0b7022e6a03c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1667,7 +1667,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) struct nft_pipapo_elem *e; struct nft_trans_gc *gc; - gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, GFP_KERNEL); if (!gc) return; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fc23fa76683a..ec03a6815790 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -242,7 +242,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe_prev; struct nft_trans_gc *gc; - gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); + gc = nft_trans_gc_alloc(set, GFP_ATOMIC); if (!gc) return ERR_PTR(-ENOMEM); @@ -634,7 +634,7 @@ static void nft_rbtree_gc(struct nft_set *set) set = nft_set_container_of(priv); net = read_pnet(&set->net); - gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, GFP_KERNEL); if (!gc) return; -- 2.43.0