A conntrack, which is added via ctnetlink, can provoke a race condition

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello All,

When a conntrack is created  by kernel, it is initialized (sets
IPS_{DST,SRC}_NAT_DONE_BIT bits in nf_nat_setup_info) and only then it
is added in hashes (__nf_conntrack_hash_insert), so one conntract
can't be initialized from a few threads concurrently.

ctnetlink can add an uninitialized conntrack (w/o
IPS_{DST,SRC}_NAT_DONE_BIT) in hashes, then a few threads can look up
this conntrack and start initialize it concurrently. It's dangerous,
because BUG can be triggered from  nf_nat_setup_info.

I added a busy loop in nf_nat_setup_info before BUG_ON to increase
race window and the bug is triggered without any problem. This kernel
patch is attached to this message. And I attached a test script for
sending packets and an user-space program to add conntrack via
netlink.

[ 1307.098595] ctnetlink_create_conntrack:1723: ffff8800a527bdf0
[ 1318.374303] nf_nat_setup_info:385 ffff8800a527bdf0 1 88
[ 1318.402729] nf_nat_setup_info:385 ffff8800a527bdf0 1 88
[ 1322.290041] nf_nat_setup_info:388 ffff8800a527bdf0 1 88
[ 1322.298476] nf_nat_setup_info:388 ffff8800a527bdf0 1 38a
[ 1322.299851] ------------[ cut here ]------------
[ 1322.300800] kernel BUG at net/netfilter/nf_nat_core.c:390!
[ 1322.300800] invalid opcode: 0000 [#1] SMP
[ 1322.300800] Dumping ftrace buffer:
[ 1322.300800]    (ftrace buffer empty)
[ 1322.300800] Modules linked in: binfmt_misc nf_conntrack_netlink
nfnetlink xt_nat iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4
nf_nat_ipv4 nf_nat nf_conntrack nfsv3 nfs_acl nfs lockd sunrpc fscache
veth ip6table_filter ip6_tables iptable_filter ip_tables pcspkr
virtio_balloon i2c_piix4 virtio_net i2c_core virtio_blk floppy [last
unloaded: nf_conntrack]
[ 1322.300800] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.13.0-rc7+ #78
[ 1322.300800] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 1322.300800] task: ffff8800bb168290 ti: ffff8800bb16c000 task.ti:
ffff8800bb16c000
[ 1322.300800] RIP: 0010:[<ffffffffa00692b2>]  [<ffffffffa00692b2>]
nf_nat_setup_info+0x392/0x400 [nf_nat]
[ 1322.300800] RSP: 0018:ffff8800bfa83bb8  EFLAGS: 00010206
[ 1322.300800] RAX: 0000000000000100 RBX: ffff8800a527bdf0 RCX: 0000000000000000
[ 1322.300800] RDX: 0000000000000100 RSI: ffff8800bb1689b8 RDI: 0000000000000246
[ 1322.300800] RBP: ffff8800bfa83c58 R08: 0000000000000000 R09: 0000000000000000
[ 1322.300800] R10: 0000000000000001 R11: ffff8800bfa838e6 R12: 0000000000000001
[ 1322.300800] R13: ffff880037ae4040 R14: ffff8800bfa83c78 R15: 0000000000000000
[ 1322.300800] FS:  0000000000000000(0000) GS:ffff8800bfa80000(0000)
knlGS:0000000000000000
[ 1322.300800] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1322.300800] CR2: 00007ffb9b71e000 CR3: 00000000b92d5000 CR4: 00000000000006e0
[ 1322.300800] Stack:
[ 1322.300800]  ffffffffa00440b8 ffff8800b992fd80 ffff8800a249ab80
ffff8800b992fd80
[ 1322.300800]  ffff8800b0d06400 00000000a0087c85 ffffe8ffffc80574
0000000100000001
[ 1322.300800]  ffffffffa0046598 ffff8800bb304480 0000000000000002
ffff8800b9664000
[ 1322.300800] Call Trace:
[ 1322.300800]  <IRQ>
[ 1322.300800]  [<ffffffffa00440b8>] ? ipt_do_table+0x2e8/0x4a5 [ip_tables]
[ 1322.300800]  [<ffffffffa007d2d2>] nf_nat_ipv4_fn+0x2d2/0x330 [iptable_nat]
[ 1322.300800]  [<ffffffffa007d5de>] nf_nat_ipv4_in+0x2e/0x84 [iptable_nat]
[ 1322.300800]  [<ffffffff8159d6f0>] ? inet_del_offload+0x40/0x40
[ 1322.300800]  [<ffffffff8159435a>] nf_iterate+0x9a/0xb0
[ 1322.300800]  [<ffffffff8159d6f0>] ? inet_del_offload+0x40/0x40
[ 1322.300800]  [<ffffffff8159440c>] nf_hook_slow+0x9c/0x160
[ 1322.300800]  [<ffffffff8159d6f0>] ? inet_del_offload+0x40/0x40
[ 1322.300800]  [<ffffffff8159e3a7>] ip_rcv+0x2f7/0x3d0
[ 1322.300800]  [<ffffffff815635a2>] __netif_receive_skb_core+0x4e2/0x880
[ 1322.300800]  [<ffffffff815631d2>] ? __netif_receive_skb_core+0x112/0x880
[ 1322.300800]  [<ffffffff81563958>] __netif_receive_skb+0x18/0x60
[ 1322.300800]  [<ffffffff8156460e>] process_backlog+0xbe/0x1a0
[ 1322.300800]  [<ffffffff81563e42>] net_rx_action+0x162/0x270
[ 1322.300800]  [<ffffffff8105dee4>] __do_softirq+0x104/0x2a0
[ 1322.300800]  [<ffffffff8105e45d>] irq_exit+0xcd/0xe0
[ 1322.300800]  [<ffffffff8167f586>] do_IRQ+0x56/0xc0
[ 1322.300800]  [<ffffffff81674f32>] common_interrupt+0x72/0x72
[ 1322.300800]  <EOI>
[ 1322.300800]  [<ffffffff8103d8e6>] ? native_safe_halt+0x6/0x10
[ 1322.300800]  [<ffffffff810a743d>] ? trace_hardirqs_on+0xd/0x10
[ 1322.300800]  [<ffffffff8100b284>] default_idle+0x24/0xc0
[ 1322.300800]  [<ffffffff8100bb4e>] arch_cpu_idle+0x2e/0x40
[ 1322.300800]  [<ffffffff810b9845>] cpu_startup_entry+0xc5/0x290
[ 1322.300800]  [<ffffffff8102fe6d>] start_secondary+0x21d/0x2d0
[ 1322.300800] Code: 00 48 01 d0 e9 b8 fe ff ff 48 81 8b b0 00 00 00
00 01 00 00 e9 9e fd ff ff 48 83 8b b0 00 00 00 20 e9 5f fd ff ff e8
ee f5 fe e0 <0f> 0b 4c 8b 8b b0 00 00 00 45 89 e0 48 89 d9 ba 81 01 00
00 48
[ 1322.300800] RIP  [<ffffffffa00692b2>] nf_nat_setup_info+0x392/0x400 [nf_nat]
[ 1322.300800]  RSP <ffff8800bfa83bb8>
[ 1322.383568] ---[ end trace c3cef76423498e85 ]---
[ 1322.384778] Kernel panic - not syncing: Fatal exception in interrupt
[ 1322.385766] Dumping ftrace buffer:
[ 1322.385766]    (ftrace buffer empty)
From f501d2e80c2811aa7d69110d72f24e6b7173d920 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@xxxxxxxxxx>
Date: Mon, 6 Jan 2014 18:59:37 +0400
Subject: [PATCH] debug

Signed-off-by: Andrey Vagin <avagin@xxxxxxxxxx>
---
 net/core/dev.c                       | 2 ++
 net/netfilter/nf_conntrack_netlink.c | 1 +
 net/netfilter/nf_nat_core.c          | 8 ++++++++
 3 files changed, 11 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4fc1722..671e4b4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2981,7 +2981,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u16 tcpu;
+	static int xxx = 0;
 
+	return (xxx++) % 2;
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->real_num_rx_queues)) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 08870b8..2f6c724 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1720,6 +1720,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	if (tstamp)
 		tstamp->start = ktime_to_ns(ktime_get_real());
 
+	printk("%s:%d: %p\n", __func__, __LINE__, ct);
 	err = nf_conntrack_hash_check_insert(ct);
 	if (err < 0)
 		goto err2;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 63a8154..5600bfd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -379,6 +379,14 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
+	if (net != &init_net)
+	{
+		long long i;
+		printk("%s:%d %p %d %lx\n", __func__, __LINE__, ct, maniptype, ct->status);
+		for (i = 0; i < 10000000000; i++)
+			asm("nop");
+		printk("%s:%d %p %d %lx\n", __func__, __LINE__, ct, maniptype, ct->status);
+	}
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
-- 
1.8.4.2

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>

#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
#include <libnetfilter_conntrack/libnetfilter_conntrack_tcp.h>

int main(void)
{
	int ret;
	struct nfct_handle *h;
	struct nf_conntrack *ct;

	ct = nfct_new();
	if (!ct) {
		perror("nfct_new");
		return 0;
	}

	nfct_set_attr_u8(ct, ATTR_L3PROTO, AF_INET);
	nfct_set_attr_u32(ct, ATTR_IPV4_SRC, inet_addr("192.168.122.1"));
	nfct_set_attr_u32(ct, ATTR_IPV4_DST, inet_addr("192.168.122.56"));
	
	nfct_set_attr_u8(ct, ATTR_L4PROTO, IPPROTO_TCP);
	nfct_set_attr_u16(ct, ATTR_PORT_SRC, htons(6588));
	nfct_set_attr_u16(ct, ATTR_PORT_DST, htons(6588));

	nfct_setobjopt(ct, NFCT_SOPT_SETUP_REPLY);
	nfct_setobjopt(ct, NFCT_SOPT_UNDO_SNAT);

	nfct_set_attr_u8(ct, ATTR_TCP_STATE, TCP_CONNTRACK_SYN_SENT);
	nfct_set_attr_u32(ct, ATTR_TIMEOUT, 30);

	h = nfct_open(CONNTRACK, 0);
	if (!h) {
		perror("nfct_open");
		nfct_destroy(ct);
		return -1;
	}

	ret = nfct_query(h, NFCT_Q_CREATE, ct);

	printf("TEST: create conntrack ");
	if (ret == -1)
		printf("(%d)(%s)\n", ret, strerror(errno));
	else
		printf("(OK)\n");

	nfct_close(h);

	nfct_destroy(ct);

	ret == -1 ? exit(EXIT_FAILURE) : exit(EXIT_SUCCESS);
}
from scapy.all import *

a=IP(dst="192.168.122.56")/TCP(flags="S", sport=6588, dport=6588)
for i in xrange(4):
	send(a,  verbose=0)

[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux