Add .release_ops, that is called in case of error at a later stage in the expression initialization path, ie. .select_ops() has been already set up operations and that needs to be undone. This allows us to follow a more simplistic approach, ie. place the match/target into the list from the .select_ops path. Otherwise, if there is already an extension in the list, bump the reference counter and re-use it. First, use the .deactivate callback to remove the extension from the list, while the mutex is still held. Then, check for the reference counter from the destroy path. In case this is zero, then we have to release the nft_xt object. Therefore, there is no need for kfree_rcu() from the destroy path since no packets are walking the expression ops. So all list operations are guaranteed to happen under the mutex. The atomic reference counter is still needed since the destroy path is called away from pernet mutex that protects the transaction. Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> --- v2: Use deactivate hook, so all list operations occur while the mutex is held. BTW, if this approach works fine? Do we still need the atomic refcount? include/net/netfilter/nf_tables.h | 3 ++ net/netfilter/nf_tables_api.c | 7 ++- net/netfilter/nft_compat.c | 93 ++++++++++++++++++--------------------- 3 files changed, 53 insertions(+), 50 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b4984bbbe157..1abd3cc84863 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -690,10 +690,12 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, gcb->elems[gcb->head.cnt++] = elem; } +struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops + * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier @@ -706,6 +708,7 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); + void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; struct list_head list; const char *name; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5a92f23f179f..928a22a71aa5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2126,6 +2126,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, { struct nft_expr_info info; struct nft_expr *expr; + struct module *owner; int err; err = nf_tables_expr_parse(ctx, nla, &info); @@ -2145,7 +2146,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err3: kfree(expr); err2: - module_put(info.ops->type->owner); + owner = info.ops->type->owner; + if (info.ops->type->release_ops) + info.ops->type->release_ops(info.ops); + + module_put(owner); err1: return ERR_PTR(err); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index fe64df848365..1f906bfb2955 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -28,16 +28,6 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; refcount_t refcnt; - - /* used only when transaction mutex is locked */ - unsigned int listcnt; - - /* Unlike other expressions, ops doesn't have static storage duration. - * nft core assumes they do. We use kfree_rcu so that nft core can - * can check expr->ops->size even after nft_compat->destroy() frees - * the nft_xt struct that holds the ops structure. - */ - struct rcu_head rcu_head; }; /* Used for matches where *info is larger than X byte */ @@ -61,26 +51,11 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net) return net_generic(net, nft_compat_net_id); } -static void nft_xt_get(struct nft_xt *xt) -{ - /* refcount_inc() warns on 0 -> 1 transition, but we can't - * init the reference count to 1 in .select_ops -- we can't - * undo such an increase when another expression inside the same - * rule fails afterwards. - */ - if (xt->listcnt == 0) - refcount_set(&xt->refcnt, 1); - else - refcount_inc(&xt->refcnt); - - xt->listcnt++; -} - static bool nft_xt_put(struct nft_xt *xt) { if (refcount_dec_and_test(&xt->refcnt)) { WARN_ON_ONCE(!list_empty(&xt->head)); - kfree_rcu(xt, rcu_head); + list_del(&xt->head); return true; } @@ -281,7 +256,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -305,14 +279,13 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (!target->target) return -EINVAL; - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); return 0; } static void nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); struct xt_tgdtor_param par; @@ -324,8 +297,10 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + if (refcount_read(&xt->refcnt) == 0) { module_put(target->me); + kfree(xt); + } } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -498,7 +473,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -514,13 +488,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - ret = xt_check_match(&par, size, proto, inv); - if (ret < 0) - return ret; - - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); - return 0; + return xt_check_match(&par, size, proto, inv); } static int @@ -552,6 +520,7 @@ static void __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) { + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); struct xt_match *match = expr->ops->data; struct module *me = match->me; struct xt_mtdtor_param par; @@ -563,8 +532,10 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, if (par.match->destroy != NULL) par.match->destroy(&par); - if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + if (refcount_read(&xt->refcnt) == 0) { module_put(me); + kfree(xt); + } } static void @@ -579,10 +550,8 @@ static void nft_compat_deactivate(const struct nft_ctx *ctx, { struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { - if (--xt->listcnt == 0) - list_del_init(&xt->head); - } + if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) + nft_xt_put(xt); } static void @@ -813,8 +782,10 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &cn->nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) + if (nft_match_cmp(match, mt_name, rev, family)) { + refcount_inc(&nft_match->refcnt); return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -833,7 +804,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - refcount_set(&nft_match->refcnt, 0); + refcount_set(&nft_match->refcnt, 1); nft_match->ops.type = &nft_match_type; nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; @@ -855,7 +826,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - nft_match->listcnt = 0; list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; @@ -864,9 +834,21 @@ nft_match_select_ops(const struct nft_ctx *ctx, return ERR_PTR(err); } +static void nft_match_release_ops(const struct nft_expr_ops *ops) +{ + struct nft_xt *xt = container_of(ops, struct nft_xt, ops); + struct xt_match *match = ops->data; + + if (nft_xt_put(xt)) { + module_put(match->me); + kfree(xt); + } +} + static struct nft_expr_type nft_match_type __read_mostly = { .name = "match", .select_ops = nft_match_select_ops, + .release_ops = nft_match_release_ops, .policy = nft_match_policy, .maxattr = NFTA_MATCH_MAX, .owner = THIS_MODULE, @@ -912,8 +894,10 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (!target->target) continue; - if (nft_target_cmp(target, tg_name, rev, family)) + if (nft_target_cmp(target, tg_name, rev, family)) { + refcount_inc(&nft_target->refcnt); return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); @@ -937,7 +921,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - refcount_set(&nft_target->refcnt, 0); + refcount_set(&nft_target->refcnt, 1); nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -952,7 +936,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - nft_target->listcnt = 0; list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; @@ -961,9 +944,21 @@ nft_target_select_ops(const struct nft_ctx *ctx, return ERR_PTR(err); } +static void nft_target_release_ops(const struct nft_expr_ops *ops) +{ + struct nft_xt *xt = container_of(ops, struct nft_xt, ops); + struct xt_target *target = ops->data; + + if (nft_xt_put(xt)) { + module_put(target->me); + kfree(xt); + } +} + static struct nft_expr_type nft_target_type __read_mostly = { .name = "target", .select_ops = nft_target_select_ops, + .release_ops = nft_target_release_ops, .policy = nft_target_policy, .maxattr = NFTA_TARGET_MAX, .owner = THIS_MODULE, -- 2.11.0