[RFC] nf_conntrack_dns: Workaround parallel DNS resolve

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some versions of glibc make use of parallel DNS lookup in case ipv6 is enabled on the machine.
They send out one A and one AAAA query for a domain in short timeframes.
However they use the same port, resulting in same 4-way tuple. As the conntrack is NEW the second packet is
dropped resulting in a significant delay on client side (5sec) due to retransmission.

Doing some major changes in kernel for the sake of one protocol does not seem to be a good solution. See reference:
http://www.spinics.net/lists/netfilter-devel/msg15860.html

What I tried in this workaround-fix is to get the desired functionality (AAAA query can pass) with as little change as possible.
So I added a conntrack helper for DNS keeping track of A queries in union nf_conntrack_proto. If AAAA query matches, it's
associated with the first conntrack and source port is mangled back from 1024 to original value.

Built and tested on 3.12.30. Honestly spoken I don't think that this should go into mainline - but it may be interesting for other people.
    
Signed-off-by: Sebastian Poehn <sebastian.poehn@xxxxxxxxxxxxxx>

diff --git a/include/linux/netfilter/nf_conntrack_dns.h b/include/linux/netfilter/nf_conntrack_dns.h
new file mode 100644
index 0000000..5c709d2
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_dns.h
@@ -0,0 +1,25 @@
+#ifndef _SOP_NF_CONNTRACK_DNS_H
+#define _SOP_NF_CONNTRACK_DNS_H
+
+#define DNS_PORT 53
+#define	DNS_RECORD_TYPE				2
+#define	DNS_RECORD_CLASS			2
+#define	DNS_RECORD_TYPE_AND_CLASS		(DNS_RECORD_TYPE + DNS_RECORD_CLASS)
+#define	DNS_RECORD_MIN				(sizeof("A") + DNS_RECORD_TYPE_AND_CLASS)
+
+struct nf_ct_dns {
+	u8 usage;
+	char query[0];
+};
+
+struct dnshdr {
+	__be16 query_id;
+	__be16 flags;
+	__be16 question_count;
+	__be16 answer_count;
+	__be16 authority_count;
+	__be16 additional_record_count;
+	char query[0];
+};
+
+#endif /* _SOP_NF_CONNTRACK_DNS_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a776541..afeba0a 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/atomic.h>
 
+#include <linux/netfilter/nf_conntrack_dns.h>
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #include <linux/netfilter/nf_conntrack_sctp.h>
@@ -33,6 +34,8 @@ union nf_conntrack_proto {
 	struct ip_ct_sctp sctp;
 	struct ip_ct_tcp tcp;
 	struct nf_ct_gre gre;
+//FIXME: Has to be changed! Will do in the very end as it break my build setup
+//	struct nf_ct_dns dns;
 };
 
 union nf_conntrack_expect_proto {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 91077a6..e6fe611 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -320,6 +320,16 @@ config NF_CONNTRACK_TFTP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_DNS
+	tristate "DNS protocol support"
+	depends on NETFILTER_ADVANCED
+	help
+	  This is a workaround for dns resolvers sending out A and AAAA requests
+	  in a short timeframe. This will rewrite source port of the second request
+	  so we do not drop the packet due to NEW conntrack.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
 	select NETFILTER_NETLINK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4002bb5..cc6edb9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,8 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
+obj-$(CONFIG_NF_CONNTRACK_DNS) += nf_conntrack_dns.o
+
 nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
 		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
diff --git a/net/netfilter/nf_conntrack_dns.c b/net/netfilter/nf_conntrack_dns.c
new file mode 100644
index 0000000..3299e62
--- /dev/null
+++ b/net/netfilter/nf_conntrack_dns.c
@@ -0,0 +1,268 @@
+/* (C) 2001-2002 Magnus Boden <mb@xxxxxxxxxxxxx>
+ * (C) 2006-2012 Patrick McHardy <kaber@xxxxxxxxx>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_dns.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+MODULE_AUTHOR("Sebastian Poehn <sebastian.poehn@xxxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("DNS connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_conntrack_dns");
+MODULE_ALIAS_NFCT_HELPER("dns");
+
+#define	MAX_PACKETS			1
+#define MAX_PORTS			8
+#define MAX_QUERY_LEN		(sizeof(union nf_conntrack_proto) - 1)
+#define MIN(a, b)			((a < b) ? a : b)
+
+static unsigned short ports[MAX_PORTS];
+static unsigned int ports_c;
+module_param_array(ports, ushort, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "Port numbers of DNS servers");
+
+enum dns_query_type {
+	QUERY_A = 1, QUERY_AAAA = 0x1C,
+};
+
+struct nf_conn *search_ct_for_me(struct nf_conntrack_tuple *tuple,
+		struct nf_conn *ct)
+{
+	u16 zone = nf_ct_zone(ct);
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_tuple_hash *h;
+
+	/* look for tuple match */
+	h = nf_conntrack_find_get(net, zone, tuple);
+	if (NULL == h)
+		return NULL;
+
+	return nf_ct_tuplehash_to_ctrack(h);
+}
+
+int is_response(const struct dnshdr *dnsh_)
+{
+	u16 response = ntohs(dnsh_->flags);
+	response &= 0x8000;
+	response = response >> 15;
+	return response;
+}
+
+/* Somewhere in the stack the second packet of a connection gets mangled. Source
+ * port is changed to 1024. As we have the original port still conserved in the
+ * conntrack tuple we can restore it in this place.
+ */
+void fixup_udp_sport(struct sk_buff *skb, struct nf_conn *ct,
+		enum ip_conntrack_info ctinfo, unsigned int protoff,
+		struct udphdr *uh)
+{
+
+	__be16 port_wanted =
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+	__be16 *port_actual = &uh->source;
+
+	if (*port_actual == port_wanted)
+		return;
+
+	*port_actual = port_wanted;
+	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, 0, 0, NULL, 0)) {
+		nf_ct_helper_log(skb, ct,
+				"Recalculation of UDP header fields failed\n");
+		return;
+	}
+
+	pr_debug("Changed port to %d\n", ntohs(port_wanted));
+}
+
+/*a
+ * Release the nf_conn of skb and assign @ct also taking one refence on it
+ */
+
+void attach_ct_to_skb(struct sk_buff *skb, struct nf_conn *ct)
+{
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = &ct->ct_general;
+	nf_conntrack_get(skb->nfct);
+}
+
+static int dns_help(struct sk_buff *skb, unsigned int protoff,
+		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+
+	u8 buffer[sizeof(struct udphdr)
+			  + sizeof(struct dnshdr)
+			  + MAX_QUERY_LEN];
+
+	struct udphdr *uh;
+	struct dnshdr *dnsh;
+	char *query;
+	u16 *type_ptr;
+	u16 type;
+
+	struct nf_conntrack_tuple *tuple =
+			&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
+	struct nf_ct_dns *store;
+
+	int packet_len = skb->len - protoff;
+	int query_len = packet_len
+			- sizeof(struct udphdr)
+			- sizeof(struct dnshdr);
+	int string_len = 0;
+
+	/* Only handle new connections */
+	if (IP_CT_NEW != ctinfo)
+		return NF_ACCEPT;
+
+	/* Basic length validation */
+	if (packet_len <= 0 || query_len < DNS_RECORD_MIN) {
+		nf_ct_helper_log(skb, ct,
+				"DNS packet of insuffient length: %d\n",
+				packet_len);
+		return NF_ACCEPT;
+	}
+
+	/* Get UDP header */
+	uh = skb_header_pointer(skb, protoff, MIN(packet_len, sizeof(buffer)),
+			buffer);
+	if (NULL == uh) {
+		nf_ct_helper_log(skb, ct,
+				"Cannot get sufficient length skb part of %d: %p",
+				MIN(packet_len, sizeof(buffer)), skb);
+		return NF_ACCEPT;
+	}
+
+	/* Get DNS header */
+	dnsh = (struct dnshdr *) (uh + 1);
+	query = dnsh->query;
+
+	/* Get first record */
+	string_len = strnlen(query, MIN(MAX_QUERY_LEN, query_len));
+	if ((query_len - string_len) < DNS_RECORD_TYPE_AND_CLASS) {
+		nf_ct_helper_log(skb, ct,
+				"Inappropriately formated record: Only %d left for type and class\n",
+				query_len - string_len);
+		return NF_ACCEPT;
+	}
+	type_ptr = (u16 *) (query + string_len + 1);
+	type = ntohs(*type_ptr);
+
+	/* Only work on Query */
+	if (is_response(dnsh)) {
+		pr_debug("DNS RESPONSE for %s\n", query);
+		return NF_ACCEPT;
+	}
+
+	pr_debug("DNS QUERY for %s type %s\n", query,
+			(type == QUERY_A) ? "A" : "AAAA");
+
+	if (ntohs(dnsh->question_count) < 1)
+		return NF_ACCEPT;
+
+	switch (type) {
+	case QUERY_A:
+		/* Store query in opaque storage of ct */
+		store = (struct nf_ct_dns *) &ct->proto;
+		strncpy(store->query, query, MAX_QUERY_LEN);
+		store->usage = 0;
+		break;
+	case QUERY_AAAA:{
+			char *stored_query;
+			struct nf_conn *stored = search_ct_for_me(tuple, ct);
+			if (NULL == stored)
+				return NF_ACCEPT;
+
+			store = (struct nf_ct_dns *) &stored->proto;
+
+			/* Only allow MAX_PACKETS for one connection */
+			if (store->usage >= MAX_PACKETS)
+				return NF_ACCEPT;
+
+			store->usage++;
+			stored_query = store->query;
+
+			if (0 == strncmp(stored_query, query, MIN(MAX_QUERY_LEN,
+					string_len))) {
+				attach_ct_to_skb(skb, stored);
+				fixup_udp_sport(skb, ct, ctinfo, protoff, uh);
+			}
+			break;
+		}
+	/* do nothing and NF_ACCEPT for all other query types */
+	}
+
+	return NF_ACCEPT;
+}
+
+static struct nf_conntrack_helper dnsp[MAX_PORTS][2] __read_mostly;
+
+static const struct nf_conntrack_expect_policy dns_exp_policy = {
+		.max_expected = 1, .timeout = 5 * 60, };
+
+static void nf_conntrack_dns_fini(void)
+{
+	int i, j;
+
+	for (i = 0; i < ports_c; i++) {
+		for (j = 0; j < 2; j++)
+			nf_conntrack_helper_unregister(&dnsp[i][j]);
+	}
+}
+
+static int __init nf_conntrack_dns_init(void)
+{
+	int i, j, ret;
+
+	if (ports_c == 0)
+		ports[ports_c++] = DNS_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		memset(&dnsp[i], 0, sizeof(dnsp[i]));
+
+		dnsp[i][0].tuple.src.l3num = AF_INET;
+		dnsp[i][1].tuple.src.l3num = AF_INET6;
+		for (j = 0; j < 2; j++) {
+			dnsp[i][j].tuple.dst.protonum = IPPROTO_UDP;
+			dnsp[i][j].tuple.src.u.udp.port = htons(ports[i]);
+			dnsp[i][j].expect_policy = &dns_exp_policy;
+			dnsp[i][j].me = THIS_MODULE;
+			dnsp[i][j].help = dns_help;
+
+			if (ports[i] == DNS_PORT)
+				sprintf(dnsp[i][j].name, "dns");
+			else
+				sprintf(dnsp[i][j].name, "dns-%u", i);
+
+			ret = nf_conntrack_helper_register(&dnsp[i][j]);
+			if (ret) {
+				pr_err("nf_ct_dns: failed to register helper for pf: %u port: %u\n",
+					dnsp[i][j].tuple.src.l3num, ports[i]);
+				nf_conntrack_dns_fini();
+				return ret;
+			}
+		}
+	}
+	return 0;
+}
+
+module_init(nf_conntrack_dns_init);
+module_exit(nf_conntrack_dns_fini);

--
To unsubscribe from this list: send the line "unsubscribe netfilter" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Netfilter Development]     [Linux Kernel Networking Development]     [Netem]     [Berkeley Packet Filter]     [Linux Kernel Development]     [Advanced Routing & Traffice Control]     [Bugtraq]

  Powered by Linux