If userspace program exits while the queue its subscribed to has packets available we get following (harmless) RCU splat: net/netfilter/nfnetlink_queue.c:261 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by swapper/0/0: #0: (rcu_callback){....}-{0:0}, at: rcu_core #1: (&inst->lock){+.-.}-{3:3}, at: instance_destroy_rcu [..] Call Trace: lockdep_rcu_suspicious+0x1ab/0x250 nfqnl_reinject+0x5d3/0xfb0 instance_destroy_rcu+0x1b5/0x220 rcu_core+0xe32 [..] This is harmless because the incorrectly-obtained pointer will not be dereferenced in case nfqnl_reinject is called with NF_DROP verdict. Fix this by open-coding skb+entry release without going through nfqnl_reinject(). kfree_skb+release_ref is exactly what nfql_reinject ends up doing when called with NF_DROP, except that it also does a truckload of other things that are irrelevant for DROP. A similar warning can be triggered by flushing the ruleset while packets are being reinjected. This is harmless as well, the WARN_ON_ONCE() should be removed. Reported-by: Yi Chen <yiche@xxxxxxxxxx> Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- Due to MR cloed this patch is actually vs nf-next tree. It will also conflict with the pending sctp checksum patch from Antonio Ojea (nft_queue.sh), I can resend if needed once Antonios patch is applied (conflict resulution is simple: use both changes). net/netfilter/nfnetlink_queue.c | 12 ++++-- .../selftests/net/netfilter/nft_queue.sh | 37 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00f4bd21c59b..812117d837a7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -323,7 +323,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) hooks = nf_hook_entries_head(net, pf, entry->state.hook); i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + if (!hooks || i >= hooks->num_hook_entries) { kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); nf_queue_entry_free(entry); return; @@ -401,16 +401,22 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) { struct nf_queue_entry *entry, *next; + LIST_HEAD(head); spin_lock_bh(&queue->lock); list_for_each_entry_safe(entry, next, &queue->queue_list, list) { if (!cmpfn || cmpfn(entry, data)) { - list_del(&entry->list); + list_move(&entry->list, &head); queue->queue_total--; - nfqnl_reinject(entry, NF_DROP); } } spin_unlock_bh(&queue->lock); + + list_for_each_entry_safe(entry, next, &head, list) { + list_del(&entry->list); + kfree_skb_reason(entry->skb, SKB_DROP_REASON_NETFILTER_DROP); + nf_queue_entry_free(entry); + } } static int diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 8538f08c64c2..c61d23a8c88d 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -375,6 +375,42 @@ EOF wait 2>/dev/null } +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + ret=1 + fi +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -413,5 +449,6 @@ test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf +test_queue_removal exit $ret -- 2.43.2