From: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> [ Upstream commit 628bd3e49cba1c066228e23d71a852c23e26da73 ] set .destroy callback releases the references to other objects in maps. This is very late and it results in spurious EBUSY errors. Drop refcount from the preparation phase instead, update set backend not to drop reference counter from set .destroy path. Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the reference counter because the transaction abort path releases the map references for each element since the set is unbound. The abort path also deals with releasing reference counter for new elements added to unbound sets. Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/net/netfilter/nf_tables.h | 5 +- net/netfilter/nf_tables_api.c | 89 ++++++++++++++++++++++++++++++++++---- net/netfilter/nft_set_bitmap.c | 5 +- net/netfilter/nft_set_hash.c | 23 +++++++-- net/netfilter/nft_set_rbtree.c | 5 +- 5 files changed, 108 insertions(+), 19 deletions(-) --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -349,7 +349,8 @@ struct nft_set_ops { int (*init)(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]); - void (*destroy)(const struct nft_set *set); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_set *set); void (*gc_init)(const struct nft_set *set); unsigned int elemsize; @@ -645,6 +646,8 @@ void *nft_set_elem_init(const struct nft u64 timeout, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -388,6 +388,31 @@ static int nft_trans_set_add(const struc return 0; } +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -396,6 +421,9 @@ static int nft_delset(const struct nft_c if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); nft_use_dec(&ctx->table->use); @@ -3741,7 +3769,7 @@ static int nf_tables_newset(struct net * return 0; err4: - ops->destroy(set); + ops->destroy(&ctx, set); err3: kfree(set->name); err2: @@ -3758,7 +3786,7 @@ static void nft_set_destroy(const struct if (WARN_ON(set->use > 0)) return; - set->ops->destroy(set); + set->ops->destroy(ctx, set); module_put(to_set_type(set->ops)->owner); kfree(set->name); kvfree(set); @@ -3883,10 +3911,39 @@ void nf_tables_unbind_set(const struct n } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } nft_use_inc_restore(&set->use); } @@ -3907,13 +3964,20 @@ void nf_tables_deactivate_set(const stru nft_use_dec(&set->use); break; case NFT_TRANS_PREPARE: - if (nft_set_is_anonymous(set)) - nft_deactivate_next(ctx->net, set); + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); + } nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_use_dec(&set->use); /* fall through */ default: @@ -4473,6 +4537,7 @@ void *nft_set_elem_init(const struct nft return elem; } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -4501,11 +4566,11 @@ void nft_set_elem_destroy(const struct n } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -4513,6 +4578,7 @@ static void nf_tables_set_elem_destroy(c nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext)); kfree(elem); } +EXPORT_SYMBOL_GPL(nf_tables_set_elem_destroy); static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) @@ -6940,6 +7006,8 @@ static int __nf_tables_abort(struct net case NFT_MSG_DELSET: nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -7604,6 +7672,9 @@ static void __nft_release_table(struct n list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); nft_use_dec(&table->use); + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -273,13 +273,14 @@ static int nft_bitmap_init(const struct return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -379,19 +379,31 @@ static int nft_rhash_init(const struct n return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -629,7 +641,8 @@ static int nft_hash_init(const struct nf return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -639,7 +652,7 @@ static void nft_hash_destroy(const struc for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -466,7 +466,8 @@ static int nft_rbtree_init(const struct return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -477,7 +478,7 @@ static void nft_rbtree_destroy(const str while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } Patches currently in stable-queue which might be from kroah.com@xxxxxxxxxxxxxxx are queue-4.19/netfilter-nf_tables-bogus-ebusy-when-deleting-flowtable-after-flush-for-4.19.patch queue-4.19/netfilter-nft_set_rbtree-switch-to-node-list-walk-for-overlap-detection.patch queue-4.19/netfilter-nf_tables-validate-nfproto_-family.patch queue-4.19/netfilter-nf_tables-unregister-flowtable-hooks-on-netns-exit.patch queue-4.19/netfilter-nf_tables-fix-gc-transaction-races-with-netns-and-netlink-event-exit-path.patch queue-4.19/netfilter-nft_set_rbtree-use-read-spinlock-to-avoid-datapath-contention.patch queue-4.19/netfilter-nft_dynset-report-eopnotsupp-on-missing-set-feature.patch queue-4.19/netfilter-nf_tables-discard-table-flag-update-with-pending-basechain-deletion.patch queue-4.19/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elements-in-this-transaction.patch queue-4.19/netfilter-nf_tables-mark-set-as-dead-when-unbinding-anonymous-set-with-timeout.patch queue-4.19/netfilter-nf_tables-reject-new-basechain-after-table-flag-update.patch queue-4.19/netfilter-nf_tables-gc-transaction-race-with-netns-dismantle.patch queue-4.19/netfilter-nftables-update-table-flags-from-the-commit-phase.patch queue-4.19/netfilter-nf_tables-allow-nfproto_inet-in-nft_-match-target-_validate.patch queue-4.19/netfilter-nft_set_rbtree-skip-end-interval-element-from-gc.patch queue-4.19/netfilter-nf_tables-drop-map-element-references-from-preparation-phase.patch queue-4.19/netfilter-nf_tables-gc-transaction-race-with-abort-path.patch queue-4.19/netfilter-nf_tables-fix-memleak-when-more-than-255-elements-expired.patch queue-4.19/netfilter-nft_set_rbtree-add-missing-expired-checks.patch queue-4.19/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch queue-4.19/netfilter-nft_dynset-relax-superfluous-check-on-set-updates.patch queue-4.19/netfilter-nf_tables-defer-gc-run-if-previous-batch-is-still-pending.patch queue-4.19/netfilter-nft_set_hash-try-later-when-gc-hits-eagain-on-iteration.patch queue-4.19/netfilter-nft_dynset-fix-timeouts-later-than-23-days.patch queue-4.19/netfilter-nf_tables-mark-newset-as-dead-on-transaction-abort.patch queue-4.19/netfilter-nf_tables-don-t-skip-expired-elements-during-walk.patch queue-4.19/netfilter-nftables-rename-set-element-data-activation-deactivation-functions.patch queue-4.19/netfilter-nf_tables-gc-transaction-api-to-avoid-race-with-control-plane.patch queue-4.19/netfilter-nf_tables-set-dormant-flag-on-hook-register-failure.patch queue-4.19/netfilter-nf_tables-remove-busy-mark-and-gc-batch-api.patch queue-4.19/netfilter-nf_tables-skip-dead-set-elements-in-netlink-dump.patch queue-4.19/netfilter-nf_tables-adapt-set-backend-to-use-gc-transaction-api.patch queue-4.19/netfilter-nft_set_rbtree-fix-overlap-expiration-walk.patch queue-4.19/netfilter-nf_tables-fix-table-flag-updates.patch queue-4.19/netfilter-nf_tables-do-not-compare-internal-table-flags-on-updates.patch queue-4.19/netfilter-nft_set_rbtree-allow-loose-matching-of-closing-element-in-interval.patch queue-4.19/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch queue-4.19/netfilter-nft_set_rbtree-fix-null-deref-on-element-insertion.patch queue-4.19/netfilter-nf_tables-pass-context-to-nft_set_destroy.patch queue-4.19/netfilter-nf_tables-disable-toggling-dormant-table-state-more-than-once.patch