Add .release_ops, that is called in case of error at a later stage in the expression initialization path, ie. .select_ops() has been already set up operations and that needs to be undone. This allows us to follow a more simplistic approach, ie. place the match/target into the list from the .select_ops path. Otherwise, if there is already an extension in the list, bump the reference counter and re-use it. nft_xt_put() is always called from paths where we have guarantees that no packets are walking the expression ops. Therefore, there is no need for kfree_rcu() anymore. The atomic reference counter is still needed since the destroy path is called away from pernet mutex that protects the transaction. Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> --- Compile tested only, no function test but this is more or less what I think we should do here. I still need to resync nf-next to get this patch in, so pull request for nf-next will be coming soon. include/net/netfilter/nf_tables.h | 3 ++ net/netfilter/nf_tables_api.c | 7 +++- net/netfilter/nft_compat.c | 88 ++++++++++++++------------------------- 3 files changed, 40 insertions(+), 58 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b4984bbbe157..3b4d65ddf384 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -690,10 +690,12 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, gcb->elems[gcb->head.cnt++] = elem; } +struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops + * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier @@ -706,6 +708,7 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); + void (*release_ops)(struct nft_expr_ops *ops); const struct nft_expr_ops *ops; struct list_head list; const char *name; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5a92f23f179f..eca1b957d815 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2126,6 +2126,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, { struct nft_expr_info info; struct nft_expr *expr; + struct module *owner; int err; err = nf_tables_expr_parse(ctx, nla, &info); @@ -2145,7 +2146,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err3: kfree(expr); err2: - module_put(info.ops->type->owner); + owner = info.ops->type->owner; + if (info.ops->release_ops) + info.ops->release_ops(info.ops); + + module_put(owner); err1: return ERR_PTR(err); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index fe64df848365..678e2473c5df 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -28,16 +28,6 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; refcount_t refcnt; - - /* used only when transaction mutex is locked */ - unsigned int listcnt; - - /* Unlike other expressions, ops doesn't have static storage duration. - * nft core assumes they do. We use kfree_rcu so that nft core can - * can check expr->ops->size even after nft_compat->destroy() frees - * the nft_xt struct that holds the ops structure. - */ - struct rcu_head rcu_head; }; /* Used for matches where *info is larger than X byte */ @@ -61,26 +51,12 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net) return net_generic(net, nft_compat_net_id); } -static void nft_xt_get(struct nft_xt *xt) -{ - /* refcount_inc() warns on 0 -> 1 transition, but we can't - * init the reference count to 1 in .select_ops -- we can't - * undo such an increase when another expression inside the same - * rule fails afterwards. - */ - if (xt->listcnt == 0) - refcount_set(&xt->refcnt, 1); - else - refcount_inc(&xt->refcnt); - - xt->listcnt++; -} - static bool nft_xt_put(struct nft_xt *xt) { if (refcount_dec_and_test(&xt->refcnt)) { WARN_ON_ONCE(!list_empty(&xt->head)); - kfree_rcu(xt, rcu_head); + list_del(&xt->head); + kfree(xt); return true; } @@ -281,7 +257,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -305,8 +280,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (!target->target) return -EINVAL; - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); return 0; } @@ -498,7 +471,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -514,13 +486,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - ret = xt_check_match(&par, size, proto, inv); - if (ret < 0) - return ret; - - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); - return 0; + return xt_check_match(&par, size, proto, inv); } static int @@ -573,18 +539,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } -static void nft_compat_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - enum nft_trans_phase phase) -{ - struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - - if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { - if (--xt->listcnt == 0) - list_del_init(&xt->head); - } -} - static void nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -813,8 +767,10 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &cn->nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) + if (nft_match_cmp(match, mt_name, rev, family)) { + refcount_inc(&nft_match->refcnt); return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -833,12 +789,11 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - refcount_set(&nft_match->refcnt, 0); + refcount_set(&nft_match->refcnt, 1); nft_match->ops.type = &nft_match_type; nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; - nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; @@ -855,7 +810,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - nft_match->listcnt = 0; list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; @@ -864,9 +818,19 @@ nft_match_select_ops(const struct nft_ctx *ctx, return ERR_PTR(err); } +static void nft_match_release_ops(struct nft_expr_ops *ops) +{ + struct nft_xt *xt = container_of(ops, struct nft_xt, ops); + struct xt_match *match = ops->data; + + if (nft_xt_put(xt)) + module_put(match->me); +} + static struct nft_expr_type nft_match_type __read_mostly = { .name = "match", .select_ops = nft_match_select_ops, + .release_ops = nft_match_release_ops, .policy = nft_match_policy, .maxattr = NFTA_MATCH_MAX, .owner = THIS_MODULE, @@ -912,8 +876,10 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (!target->target) continue; - if (nft_target_cmp(target, tg_name, rev, family)) + if (nft_target_cmp(target, tg_name, rev, family)) { + refcount_inc(&nft_target->refcnt); return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); @@ -937,12 +903,11 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - refcount_set(&nft_target->refcnt, 0); + refcount_set(&nft_target->refcnt, 1); nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; - nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; @@ -952,7 +917,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - nft_target->listcnt = 0; list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; @@ -961,9 +925,19 @@ nft_target_select_ops(const struct nft_ctx *ctx, return ERR_PTR(err); } +static void nft_target_release_ops(struct nft_expr_ops *ops) +{ + struct nft_xt *xt = container_of(ops, struct nft_xt, ops); + struct xt_target *target = ops->data; + + if (nft_xt_put(xt)) + module_put(target->me); +} + static struct nft_expr_type nft_target_type __read_mostly = { .name = "target", .select_ops = nft_target_select_ops, + .release_ops = nft_target_release_ops, .policy = nft_target_policy, .maxattr = NFTA_TARGET_MAX, .owner = THIS_MODULE, -- 2.11.0