2018-07-03 19:20 GMT+09:00 Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>: > On Sun, Jul 01, 2018 at 08:44:52PM +0900, Taehee Yoo wrote: >> This patch fixes below. >> 1. check null pointer of rb_next. >> rb_next can return null. so null check routine should be added. >> 2. check whether an interval flags is set or not. >> If interval flags is given, both a start node and a end node >> should be removed at once. If interval flags it not given, >> is doesn't matter. > Thank you for reviewing! > For #2, I would prefer we reject rbtree for single elements. I'm going > to send a patch for this. > > Would you rebase 1. and 3. on top? > > Thanks! > Of course! Do you mean that the 'top' is current top? or next top? Thanks! >> 3. add rcu_barrier in destroy routine. >> GC uses call_rcu to remove elements. but all elements should be >> removed before destroying set and chains. so that rcu_barrier is added. >> >> test script: >> %cat test.nft >> table inet aa { >> map map1 { >> type ipv4_addr : verdict; flags interval, timeout; >> elements = { >> 0-1 : jump a0, >> 3-4 : jump a0, >> 6-7 : jump a0, >> 9-10 : jump a0, >> 12-13 : jump a0, >> 15-16 : jump a0, >> 18-19 : jump a0, >> 21-22 : jump a0, >> 24-25 : jump a0, >> 27-28 : jump a0, >> } >> timeout 1s; >> } >> chain a0 { >> } >> } >> flush ruleset >> table inet aa { >> map map1 { >> type ipv4_addr : verdict; flags interval, timeout; >> elements = { >> 0-1 : jump a0, >> 3-4 : jump a0, >> 6-7 : jump a0, >> 9-10 : jump a0, >> 12-13 : jump a0, >> 15-16 : jump a0, >> 18-19 : jump a0, >> 21-22 : jump a0, >> 24-25 : jump a0, >> 27-28 : jump a0, >> } >> timeout 1s; >> } >> chain a0 { >> } >> } >> flush ruleset >> >> splat looks like: >> [ 2402.419838] kasan: GPF could be caused by NULL-ptr deref or user memory access >> [ 2402.428433] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI >> [ 2402.429343] CPU: 1 PID: 1350 Comm: kworker/1:1 Not tainted 4.18.0-rc2+ #1 >> [ 2402.429343] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 03/23/2017 >> [ 2402.429343] Workqueue: events_power_efficient nft_rbtree_gc [nft_set_rbtree] >> [ 2402.429343] RIP: 0010:rb_next+0x1e/0x130 >> [ 2402.429343] Code: e9 de f2 ff ff 0f 1f 80 00 00 00 00 41 55 48 89 fa 41 54 55 53 48 c1 ea 03 48 b8 00 00 00 0 >> [ 2402.429343] RSP: 0018:ffff880105f77678 EFLAGS: 00010296 >> [ 2402.429343] RAX: dffffc0000000000 RBX: ffff8801143e3428 RCX: 1ffff1002287c69c >> [ 2402.429343] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000 >> [ 2402.429343] RBP: 0000000000000000 R08: ffffed0016aabc24 R09: ffffed0016aabc24 >> [ 2402.429343] R10: 0000000000000001 R11: ffffed0016aabc23 R12: 0000000000000000 >> [ 2402.429343] R13: ffff8800b6933388 R14: dffffc0000000000 R15: ffff8801143e3440 >> [ 2402.534486] kasan: CONFIG_KASAN_INLINE enabled >> [ 2402.534212] FS: 0000000000000000(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 >> [ 2402.534212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 2402.534212] CR2: 0000000000863008 CR3: 00000000a3c16000 CR4: 00000000001006e0 >> [ 2402.534212] Call Trace: >> [ 2402.534212] nft_rbtree_gc+0x2b5/0x5f0 [nft_set_rbtree] >> [ 2402.534212] process_one_work+0xc1b/0x1ee0 >> [ 2402.540329] kasan: GPF could be caused by NULL-ptr deref or user memory access >> [ 2402.534212] ? _raw_spin_unlock_irq+0x29/0x40 >> [ 2402.534212] ? pwq_dec_nr_in_flight+0x3e0/0x3e0 >> [ 2402.534212] ? set_load_weight+0x270/0x270 >> [ 2402.534212] ? __schedule+0x6ea/0x1fb0 >> [ 2402.534212] ? __sched_text_start+0x8/0x8 >> [ 2402.534212] ? save_trace+0x320/0x320 >> [ 2402.534212] ? sched_clock_local+0xe2/0x150 >> [ 2402.534212] ? find_held_lock+0x39/0x1c0 >> [ 2402.534212] ? worker_thread+0x35f/0x1150 >> [ 2402.534212] ? lock_contended+0xe90/0xe90 >> [ 2402.534212] ? __lock_acquire+0x4520/0x4520 >> [ 2402.534212] ? do_raw_spin_unlock+0xb1/0x350 >> [ 2402.534212] ? do_raw_spin_trylock+0x111/0x1b0 >> [ 2402.534212] ? do_raw_spin_lock+0x1f0/0x1f0 >> [ 2402.534212] worker_thread+0x169/0x1150 >> >> Fixes: 8d8540c4f5e0("netfilter: nft_set_rbtree: add timeout support") >> Signed-off-by: Taehee Yoo <ap420073@xxxxxxxxx> >> --- >> net/netfilter/nft_set_rbtree.c | 34 ++++++++++++++++++++++------------ >> 1 file changed, 22 insertions(+), 12 deletions(-) >> >> diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c >> index 7f3a9a2..1db52b0 100644 >> --- a/net/netfilter/nft_set_rbtree.c >> +++ b/net/netfilter/nft_set_rbtree.c >> @@ -356,27 +356,27 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, >> static void nft_rbtree_gc(struct work_struct *work) >> { >> struct nft_set_gc_batch *gcb = NULL; >> - struct rb_node *node, *prev = NULL; >> + struct rb_node *node; >> struct nft_rbtree_elem *rbe; >> struct nft_rbtree *priv; >> struct nft_set *set; >> int i; >> + bool interval = false; >> >> priv = container_of(work, struct nft_rbtree, gc_work.work); >> set = nft_set_container_of(priv); >> >> + if (set->flags & NFT_SET_INTERVAL) >> + interval = true; >> + >> write_lock_bh(&priv->lock); >> write_seqcount_begin(&priv->count); >> for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { >> rbe = rb_entry(node, struct nft_rbtree_elem, node); >> >> - if (nft_rbtree_interval_end(rbe)) { >> - prev = node; >> - continue; >> - } >> - if (!nft_set_elem_expired(&rbe->ext)) >> - continue; >> - if (nft_set_elem_mark_busy(&rbe->ext)) >> + if (!nft_set_elem_expired(&rbe->ext) || >> + nft_rbtree_interval_end(rbe) || >> + nft_set_elem_mark_busy(&rbe->ext)) >> continue; >> >> gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); >> @@ -386,12 +386,21 @@ static void nft_rbtree_gc(struct work_struct *work) >> atomic_dec(&set->nelems); >> nft_set_gc_batch_add(gcb, rbe); >> >> - if (prev) { >> - rbe = rb_entry(prev, struct nft_rbtree_elem, node); >> + if (interval) { >> + node = rb_next(node); >> + if (!node) >> + break; >> + rbe = rb_entry(node, struct nft_rbtree_elem, node); >> + >> + if (!nft_rbtree_interval_end(rbe)) { >> + node = rb_prev(node); >> + continue; >> + } >> + if (nft_set_elem_mark_busy(&rbe->ext)) >> + continue; >> atomic_dec(&set->nelems); >> nft_set_gc_batch_add(gcb, rbe); >> } >> - node = rb_next(node); >> } >> out: >> if (gcb) { >> @@ -440,9 +449,10 @@ static void nft_rbtree_destroy(const struct nft_set *set) >> struct rb_node *node; >> >> cancel_delayed_work_sync(&priv->gc_work); >> + rcu_barrier(); >> while ((node = priv->root.rb_node) != NULL) { >> - rb_erase(node, &priv->root); >> rbe = rb_entry(node, struct nft_rbtree_elem, node); >> + rb_erase(node, &priv->root); >> nft_set_elem_destroy(set, rbe, true); >> } >> } >> -- >> 2.9.3 >> -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html