On Fri, 11 Jan 2019 at 06:59, Florian Westphal <fw@xxxxxxxxx> wrote: > Hi Florian! > Taehee Yoo <ap420073@xxxxxxxxx> wrote: > > index 83adabdec900..8f3114871d20 100644 > > --- a/net/netfilter/nft_compat.c > > +++ b/net/netfilter/nft_compat.c > > @@ -29,6 +29,7 @@ struct nft_xt { > > struct list_head head; > > struct nft_expr_ops ops; > > unsigned int refcnt; > > + bool use; > > > > /* Unlike other expressions, ops doesn't have static storage duration. > > * nft core assumes they do. We use kfree_rcu so that nft core can > > @@ -48,7 +49,7 @@ struct nft_xt_match_priv { > > static bool nft_xt_put(struct nft_xt *xt) > > { > > mutex_lock(&nft_xt_lock); > > - if (--xt->refcnt == 0) { > > + if (--xt->refcnt == 0 && !xt->use) { > > list_del(&xt->head); > > I don't doubt your findings, but I would prefer if we do not have to add > these kinds of conditionals here. > > The problem is that when nft_xt_put is called, all side > effects are supposed to have been completed. > > At this point, list_del() should not be allowed anymore, > it should have taken place while we were still under transaction > mutex. > > Do you see any issues with this alternate approach? > It adds deactivate hook to list_del, so after transaction mutex is > released, next 'add' will create a new compat expression rather > than trying to use the one that is scheduled for free. > Thank you for review and sorry for late reply. I have been testing your patch and I found a problem scenario. So, I have been trying to find the fundamental problem. As far as I know, transaction mutex is a per-net mutex. So, the problem occurred in per-net scenario. NET #0 NET #1 ->select_ops() ->init() ->select_ops() ->deactivate() ->destroy() nft_xt_put() kfree_rcu(xt, rcu_head); ->init() <-- use-after-free occurred. Test commands: SHELL #1 while : do iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \ kmp -j MASQUERADE nft flush ruleset done SHELL #2 ip netns add vm1 ip netns exec vm1 bash while : do iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \ kmp -j MASQUERADE nft flush ruleset done Results: [12090.543279] BUG: KASAN: use-after-free in nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] Read of size 4 at addr ffff888111704f60 by task iptables-nft/7273 [12090.543279] CPU: 0 PID: 7273 Comm: iptables-nft Not tainted 4.20.0+ #59 [12090.543279] Call Trace: [12090.543279] dump_stack+0xc9/0x16b [12090.543279] ? show_regs_print_info+0x5/0x5 [12090.543279] ? kmsg_dump_rewind_nolock+0xd9/0xd9 [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] print_address_description+0x6a/0x270 [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] kasan_report+0x12a/0x16f [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] nft_target_init+0xb96/0xbe0 [nft_compat] [ ... ] [12090.543279] nf_tables_newrule+0x10a4/0x2570 [nf_tables] [ ... ] [12090.543279] Allocated by task 7273: [12090.543279] kasan_kmalloc+0xa5/0xd0 [12090.543279] kmem_cache_alloc_trace+0x117/0x290 [12090.543279] nft_target_select_ops+0x481/0x990 [nft_compat] [12090.543279] nf_tables_expr_parse+0x2a1/0x510 [nf_tables] [12090.543279] nf_tables_newrule+0xd0c/0x2570 [nf_tables] [12090.543279] nfnetlink_rcv_batch+0xd2e/0x1550 [nfnetlink] [12090.543279] nfnetlink_rcv+0x2ee/0x350 [nfnetlink] [12090.543279] netlink_unicast+0x414/0x5e0 [12090.543279] netlink_sendmsg+0x7b8/0xcf0 [12090.543279] ___sys_sendmsg+0x689/0x990 [12090.543279] __sys_sendmsg+0xd2/0x210 [12090.543279] do_syscall_64+0x138/0x560 [12090.543279] entry_SYSCALL_64_after_hwframe+0x49/0xbe [12090.543279] [12090.543279] Freed by task 1307: [12090.543279] __kasan_slab_free+0x135/0x180 [12090.543279] kfree+0xdb/0x280 [12090.543279] rcu_process_callbacks+0x947/0x24d0 [12090.543279] __do_softirq+0x2a5/0xa3b I agree with your idea that is to use a activate/deactivate callback. So, Could you fix your patch for this scenario? If my understanding is incorrect, please let me know. Thanks again! > diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c > --- a/net/netfilter/nft_compat.c > +++ b/net/netfilter/nft_compat.c > @@ -27,6 +27,7 @@ struct nft_xt { > struct list_head head; > struct nft_expr_ops ops; > unsigned int refcnt; > + unsigned int listcnt; > > /* Unlike other expressions, ops doesn't have static storage duration. > * nft core assumes they do. We use kfree_rcu so that nft core can > @@ -43,10 +44,15 @@ struct nft_xt_match_priv { > void *info; > }; > > +static LIST_HEAD(nft_match_list); > +static LIST_HEAD(nft_target_list); > +static struct nft_expr_type nft_match_type; > +static struct nft_expr_type nft_target_type; > + > static bool nft_xt_put(struct nft_xt *xt) > { > if (--xt->refcnt == 0) { > - list_del(&xt->head); > + WARN_ON_ONCE(!list_empty(&xt->head)); > kfree_rcu(xt, rcu_head); > return true; > } > @@ -540,6 +546,40 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) > __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); > } > > +static void nft_compat_activate(const struct nft_ctx *ctx, > + const struct nft_expr *expr, > + struct list_head *h) > +{ > + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); > + > + if (xt->listcnt == 0) > + list_add(&xt->head, h); > + > + xt->listcnt++; > +} > + > +static void nft_compat_activate_mt(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + nft_compat_activate(ctx, expr, &nft_match_list); > +} > + > +static void nft_compat_activate_tg(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + nft_compat_activate(ctx, expr, &nft_target_list); > +} > + > +static void nft_compat_deactivate(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); > + > + WARN_ON_ONCE(xt->refcnt == 0); > + if (--xt->listcnt == 0) > + list_del_init(&xt->head); > +} > + > static void > nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) > { > @@ -734,10 +774,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { > .cb = nfnl_nft_compat_cb, > }; > > -static LIST_HEAD(nft_match_list); > - > -static struct nft_expr_type nft_match_type; > - > static bool nft_match_cmp(const struct xt_match *match, > const char *name, u32 rev, u32 family) > { > @@ -794,6 +830,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, > nft_match->ops.eval = nft_match_eval; > nft_match->ops.init = nft_match_init; > nft_match->ops.destroy = nft_match_destroy; > + nft_match->ops.activate = nft_compat_activate_mt; > + nft_match->ops.deactivate = nft_compat_deactivate; > nft_match->ops.dump = nft_match_dump; > nft_match->ops.validate = nft_match_validate; > nft_match->ops.data = match; > @@ -810,6 +848,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, > > nft_match->ops.size = matchsize; > > + nft_match->listcnt = 1; > list_add(&nft_match->head, &nft_match_list); > > return &nft_match->ops; > @@ -826,10 +865,6 @@ static struct nft_expr_type nft_match_type __read_mostly = { > .owner = THIS_MODULE, > }; > > -static LIST_HEAD(nft_target_list); > - > -static struct nft_expr_type nft_target_type; > - > static bool nft_target_cmp(const struct xt_target *tg, > const char *name, u32 rev, u32 family) > { > @@ -898,6 +933,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, > nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); > nft_target->ops.init = nft_target_init; > nft_target->ops.destroy = nft_target_destroy; > + nft_target->ops.activate = nft_compat_activate_tg; > + nft_target->ops.deactivate = nft_compat_deactivate; > nft_target->ops.dump = nft_target_dump; > nft_target->ops.validate = nft_target_validate; > nft_target->ops.data = target; > @@ -907,6 +944,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, > else > nft_target->ops.eval = nft_target_eval_xt; > > + nft_target->listcnt = 1; > list_add(&nft_target->head, &nft_target_list); > > return &nft_target->ops;