[ULOGD 1/4] rework NFCT to use a generic hashtable [was Re: [ULOGD 1/4] improve netlink overrun handling of NFCT]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sorry, wrong description + patch. Let's start again.

[PATCH] rework NFCT to use a generic hashtable

This patch introduces a generic hashtable to store the nf_conntrack
objects. The objects are identified by the original and reply tuples
instead of the conntrack ID which is not dumped in the event message of
linux kernel < 2.6.25. This patch also fixes the NFCT_MSG_* by NFCT_T_*
which is the appropriate
message type tag.

Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>

-- 
"Los honestos son inadaptados sociales" -- Les Luthiers
[PATCH] rework NFCT to use a generic hashtable

This patch introduces a generic hashtable to store the nf_conntrack objects.
The objects are identified by the original and reply tuples instead of the
conntrack ID which is not dumped in the event message of linux kernel < 2.6.25.
This patch also fixes the NFCT_MSG_* by NFCT_T_* which is the appropriate
message type tag.

Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>

Index: ulogd2/input/flow/ulogd_inpflow_NFCT.c
===================================================================
--- ulogd2.orig/input/flow/ulogd_inpflow_NFCT.c	2008-05-14 13:46:34.000000000 +0200
+++ ulogd2/input/flow/ulogd_inpflow_NFCT.c	2008-05-14 13:48:10.000000000 +0200
@@ -13,12 +13,13 @@
  *      Added timestamp accounting support of the conntrack entries,
  *      reworked by Harald Welte.
  *
+ * 11 May 2008, Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>
+ * 	Use a generic hashtable to store the existing flows
+ *
  * TODO:
  * 	- add nanosecond-accurate packet receive timestamp of event-changing
  * 	  packets to {ip,nf}_conntrack_netlink, so we can have accurate IPFIX
  *	  flowStart / flowEnd NanoSeconds.
- *	- if using preallocated data structure, get rid of all list heads and
- *	  use per-bucket arrays instead.
  *	- SIGHUP for reconfiguration without loosing hash table contents, but
  *	  re-read of config and reallocation / rehashing of table, if required
  *	- Split hashtable code into separate [filter] plugin, so we can run 
@@ -34,6 +35,8 @@
 #include <sys/time.h>
 #include <time.h>
 #include <ulogd/linuxlist.h>
+#include <ulogd/jhash.h>
+#include <ulogd/hash.h>
 
 #include <ulogd/ulogd.h>
 #include <ulogd/timer.h>
@@ -44,24 +47,15 @@
 typedef enum TIMES_ { START, STOP, __TIME_MAX } TIMES;
  
 struct ct_timestamp {
-	struct llist_head list;
 	struct timeval time[__TIME_MAX];
-	int id;
-};
-
-struct ct_htable {
-	struct llist_head *buckets;
-	int num_buckets;
-	int prealloc;
-	struct llist_head idle;
-	struct ct_timestamp *ts;
+	struct nf_conntrack *ct;
 };
 
 struct nfct_pluginstance {
 	struct nfct_handle *cth;
 	struct ulogd_fd nfct_fd;
 	struct ulogd_timer timer;
-	struct ct_htable *ct_active;
+	struct hashtable *ct_active;
 };
 
 #define HTABLE_SIZE	(8192)
@@ -69,7 +63,7 @@ struct nfct_pluginstance {
 #define EVENT_MASK	NF_NETLINK_CONNTRACK_NEW | NF_NETLINK_CONNTRACK_DESTROY
 
 static struct config_keyset nfct_kset = {
-	.num_ces = 6,
+	.num_ces = 5,
 	.ces = {
 		{
 			.key	 = "pollinterval",
@@ -84,12 +78,6 @@ static struct config_keyset nfct_kset = 
 			.u.value = 1,
 		},
 		{
-			.key	 = "hash_prealloc",
-			.type	 = CONFIG_TYPE_INT,
-			.options = CONFIG_OPT_NONE,
-			.u.value = 1,
-		},
-		{
 			.key	 = "hash_buckets",
 			.type	 = CONFIG_TYPE_INT,
 			.options = CONFIG_OPT_NONE,
@@ -112,10 +100,9 @@ static struct config_keyset nfct_kset = 
 };
 #define pollint_ce(x)	(x->ces[0])
 #define usehash_ce(x)	(x->ces[1])
-#define prealloc_ce(x)	(x->ces[2])
-#define buckets_ce(x)	(x->ces[3])
-#define maxentries_ce(x) (x->ces[4])
-#define eventmask_ce(x) (x->ces[5])
+#define buckets_ce(x)	(x->ces[2])
+#define maxentries_ce(x) (x->ces[3])
+#define eventmask_ce(x) (x->ces[4])
 
 enum nfct_keys {
 	NFCT_ORIG_IP_SADDR = 0,
@@ -366,117 +353,68 @@ static struct ulogd_key nfct_okeys[] = {
 	},
 };
 
-static struct ct_htable *htable_alloc(int htable_size, int prealloc)
+static uint32_t __hash4(const struct nf_conntrack *ct, struct hashtable *table)
 {
-	struct ct_htable *htable;
-	struct ct_timestamp *ct;
-	int i;
-
-	htable = malloc(sizeof(*htable)
-			+ sizeof(struct llist_head)*htable_size);
-	if (!htable)
-		return NULL;
-
-	htable->buckets = (void *)htable + sizeof(*htable);
-	htable->num_buckets = htable_size;
-	htable->prealloc = prealloc;
-	INIT_LLIST_HEAD(&htable->idle);
-
-	for (i = 0; i < htable->num_buckets; i++)
-		INIT_LLIST_HEAD(&htable->buckets[i]);
-	
-	if (!htable->prealloc)
-		return htable;
-
-	ct = malloc(sizeof(struct ct_timestamp)
-		    * htable->num_buckets * htable->prealloc);
-	if (!ct) {
-		free(htable);
-		return NULL;
-	}
+	unsigned int a, b;
 
-	/* save the pointer for later free()ing */
-	htable->ts = ct;
+	a = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV4_SRC), sizeof(uint32_t),
+		  ((nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16) |
+		   (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO))));
 
-	for (i = 0; i < htable->num_buckets * htable->prealloc; i++)
-		llist_add(&ct[i].list, &htable->idle);
+	b = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV4_DST), sizeof(uint32_t),
+		  ((nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16) |
+		   (nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST))));
 
-	return htable;
+	/*
+	 * Instead of returning hash % table->hashsize (implying a divide)
+	 * we return the high 32 bits of the (hash * table->hashsize) that will
+	 * give results between [0 and hashsize-1] and same hash distribution,
+	 * but using a multiply, less expensive than a divide. See:
+	 * http://www.mail-archive.com/netdev@xxxxxxxxxxxxxxx/msg56623.html
+	 */
+	return ((uint64_t)jhash_2words(a, b, 0) * table->hashsize) >> 32;
 }
 
-static void htable_free(struct ct_htable *htable)
+static uint32_t __hash6(const struct nf_conntrack *ct, struct hashtable *table)
 {
-	struct llist_head *ptr, *ptr2;
-	int i;
+	unsigned int a, b;
 
-	if (htable->prealloc) {
-		/* the easy case */
-		free(htable->ts);
-		free(htable);
+	a = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV6_SRC), sizeof(uint32_t)*4,
+		  ((nfct_get_attr_u8(ct, ATTR_ORIG_L3PROTO) << 16) |
+		   (nfct_get_attr_u8(ct, ATTR_ORIG_L4PROTO))));
 
-		return;
-	}
-
-	/* non-prealloc case */
-
-	for (i = 0; i < htable->num_buckets; i++) {
-		llist_for_each_safe(ptr, ptr2, &htable->buckets[i])
-			free(container_of(ptr, struct ct_timestamp, list));
-	}
+	b = jhash(nfct_get_attr(ct, ATTR_ORIG_IPV6_DST), sizeof(uint32_t)*4,
+		  ((nfct_get_attr_u16(ct, ATTR_ORIG_PORT_SRC) << 16) |
+		   (nfct_get_attr_u16(ct, ATTR_ORIG_PORT_DST))));
 
-	/* don't need to check for 'idle' list, since it is only used in
-	 * the preallocated case */
+	return ((uint64_t)jhash_2words(a, b, 0) * table->hashsize) >> 32;
 }
 
-static int ct_hash_add(struct ct_htable *htable, unsigned int id)
+static uint32_t hash(const void *data, struct hashtable *table)
 {
-	struct ct_timestamp *ct;
-
-	if (htable->prealloc) {
-		if (llist_empty(&htable->idle)) {
-			ulogd_log(ULOGD_ERROR, "Not enough ct_timestamp entries\n");
-			return -1;
-		}
-
-		ct = container_of(htable->idle.next, struct ct_timestamp, list);
-
-		ct->id = id;
-		gettimeofday(&ct->time[START], NULL);
-
-		llist_move(&ct->list, &htable->buckets[id % htable->num_buckets]);
-	} else {
-		ct = malloc(sizeof *ct);
-		if (!ct) {
-			ulogd_log(ULOGD_ERROR, "Not enough memory\n");
-			return -1;
-		}
-
-		ct->id = id;
-		gettimeofday(&ct->time[START], NULL);
+	int ret = 0;
+	const struct ct_timestamp *ts = data;
 
-		llist_add(&ct->list, &htable->buckets[id % htable->num_buckets]);
+	switch(nfct_get_attr_u8(ts->ct, ATTR_L3PROTO)) {
+		case AF_INET:
+			ret = __hash4(ts->ct, table);
+			break;
+		case AF_INET6:
+			ret = __hash6(ts->ct, table);
+			break;
+		default:
+			break;
 	}
 
-	return 0;
+	return ret;
 }
 
-static struct ct_timestamp *ct_hash_get(struct ct_htable *htable, uint32_t id)
+static int compare(const void *data1, const void *data2)
 {
-	struct ct_timestamp *ct = NULL;
-	struct llist_head *ptr;
+	const struct ct_timestamp *u1 = data1;
+	const struct ct_timestamp *u2 = data2;
 
-	llist_for_each(ptr, &htable->buckets[id % htable->num_buckets]) {
-		ct = container_of(ptr, struct ct_timestamp, list);
-		if (ct->id == id) {
-			gettimeofday(&ct->time[STOP], NULL);
-			if (htable->prealloc)
-				llist_move(&ct->list, &htable->idle);
-			else
-				free(ct);
-			break;
-		}
-	}
-	return ct;
+	return nfct_cmp(u1->ct, u2->ct, NFCT_CMP_ORIG | NFCT_CMP_REPL);
 }
 
 static int propagate_ct(struct ulogd_pluginstance *upi,
@@ -600,28 +538,57 @@ static int event_handler(enum nf_conntra
 	struct nfct_pluginstance *cpi = 
 				(struct nfct_pluginstance *) upi->private;
 	struct ct_timestamp *ts = NULL;
+	struct ct_timestamp tmp = {
+		.ct = ct,
+	};
 	struct ulogd_pluginstance *npi = NULL;
 	int ret = 0;
 
-	if (type == NFCT_MSG_NEW) {
-		if (usehash_ce(upi->config_kset).u.value != 0) {
-			ct_hash_add(cpi->ct_active, nfct_get_attr_u32(ct, ATTR_ID));
-			return 0;
+	if (usehash_ce(upi->config_kset).u.value == 0)
+		return NFCT_CB_CONTINUE;
+
+	switch(type) {
+	case NFCT_T_NEW:
+		ts = hashtable_add(cpi->ct_active, &tmp);
+		gettimeofday(&ts->time[START], NULL);
+		return NFCT_CB_STOLEN;
+	case NFCT_T_UPDATE:
+		ts = hashtable_get(cpi->ct_active, &tmp);
+		if (ts)
+			nfct_copy(ts->ct, ct, NFCT_CP_META);
+		else {
+			ts = hashtable_add(cpi->ct_active, &tmp);
+			gettimeofday(&ts->time[START], NULL);
+			return NFCT_CB_STOLEN;
 		}
-	} else if (type == NFCT_MSG_DESTROY) {
-		if (usehash_ce(upi->config_kset).u.value != 0)
-			ts = ct_hash_get(cpi->ct_active, nfct_get_attr_u32(ct, ATTR_ID));
-	}
+		break;
+	case NFCT_T_DESTROY:
+		ts = hashtable_get(cpi->ct_active, &tmp);
+		if (ts)
+			gettimeofday(&ts->time[STOP], NULL);
+
+		/* since we support the re-use of one instance in
+		 * several different stacks, we duplicate the message
+		 * to let them know */
+		llist_for_each_entry(npi, &upi->plist, plist) {
+			ret = propagate_ct(npi, ct, type, ts);
+			if (ret != 0)
+				break;
+		}
+
+		propagate_ct(upi, ct, type, ts);
 
-	/* since we support the re-use of one instance in
-	 * several different stacks, we duplicate the message
-	 * to let them know */
-	llist_for_each_entry(npi, &upi->plist, plist) {
-		ret = propagate_ct(npi, ct, type, ts);
-		if (ret != 0)
-			return ret;
+		if (ts) {
+			hashtable_del(cpi->ct_active, ts);
+			free(ts->ct);
+		}
+		break;
+	default:
+		ulogd_log(ULOGD_NOTICE, "unknown netlink message type\n");
+		break;
 	}
-	return propagate_ct(upi, ct, type, ts);
+
+	return NFCT_CB_CONTINUE;
 }
 
 static int read_cb_nfct(int fd, unsigned int what, void *param)
@@ -677,7 +644,6 @@ static int constructor_nfct(struct ulogd
 {
 	struct nfct_pluginstance *cpi = 
 			(struct nfct_pluginstance *)upi->private;
-	int prealloc;
 
 	cpi->cth = nfct_open(NFNL_SUBSYS_CTNETLINK,
 			     eventmask_ce(upi->config_kset).u.value);
@@ -695,15 +661,13 @@ static int constructor_nfct(struct ulogd
 
 	ulogd_register_fd(&cpi->nfct_fd);
 
-	if (prealloc_ce(upi->config_kset).u.value != 0)
-		prealloc = maxentries_ce(upi->config_kset).u.value / 
-				buckets_ce(upi->config_kset).u.value;
-	else
-		prealloc = 0;
-
 	if (usehash_ce(upi->config_kset).u.value != 0) {
-		cpi->ct_active = htable_alloc(buckets_ce(upi->config_kset).u.value,
-					      prealloc);
+		cpi->ct_active =
+		     hashtable_create(buckets_ce(upi->config_kset).u.value,
+		     		      maxentries_ce(upi->config_kset).u.value,
+				      sizeof(struct ct_timestamp),
+				      hash,
+				      compare);
 		if (!cpi->ct_active) {
 			ulogd_log(ULOGD_FATAL, "error allocating hash\n");
 			nfct_close(cpi->cth);
@@ -719,7 +683,7 @@ static int destructor_nfct(struct ulogd_
 	struct nfct_pluginstance *cpi = (void *) pi;
 	int rc;
 	
-	htable_free(cpi->ct_active);
+	hashtable_destroy(cpi->ct_active);
 
 	rc = nfct_close(cpi->cth);
 	if (rc < 0)
Index: ulogd2/src/hash.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ ulogd2/src/hash.c	2008-05-14 13:46:52.000000000 +0200
@@ -0,0 +1,193 @@
+/*
+ * (C) 2006-2007 by Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Description: generic hash table implementation
+ */
+
+#include <ulogd/hash.h>
+#include <ulogd/slist.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct hashtable_node *hashtable_alloc_node(int datasize, void *data)
+{
+	struct hashtable_node *n;
+	int size = sizeof(struct hashtable_node) + datasize;
+
+	n = malloc(size);
+	if (!n)
+		return NULL;
+	memset(n, 0, size);
+	memcpy(n->data, data, datasize);
+
+	return n;
+}
+
+void hashtable_destroy_node(struct hashtable_node *h)
+{
+	free(h);
+}
+
+struct hashtable *
+hashtable_create(int hashsize, int limit, int datasize,
+		 uint32_t (*hash)(const void *data, struct hashtable *table),
+		 int (*compare)(const void *data1, const void *data2))
+{
+	int i;
+	struct hashtable *h;
+	int size = sizeof(struct hashtable)
+		   + hashsize * sizeof(struct slist_head);
+
+	h = (struct hashtable *) malloc(size);
+	if (!h) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	memset(h, 0, size);
+	for (i=0; i<hashsize; i++)
+		INIT_SLIST_HEAD(h->members[i]);
+
+	h->hashsize = hashsize;
+	h->limit = limit;
+	h->datasize = datasize;
+	h->hash = hash;
+	h->compare = compare;
+
+	return h;
+}
+
+void hashtable_destroy(struct hashtable *h)
+{
+	hashtable_flush(h);
+	free(h);
+}
+
+void *hashtable_add(struct hashtable *table, void *data)
+{
+	uint32_t id;
+	struct slist_head *e;
+	struct hashtable_node *n;
+
+	/* hash table is full */
+	if (table->count >= table->limit) {
+		errno = ENOSPC;
+		return 0;
+	}
+
+	id = table->hash(data, table);
+
+	slist_for_each(e, &table->members[id]) {
+		n = slist_entry(e, struct hashtable_node, head);
+		if (table->compare(n->data, data)) {
+			errno = EEXIST;
+			return 0;
+		}
+	}
+
+	n = hashtable_alloc_node(table->datasize, data);
+	if (n == NULL) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	slist_add(&table->members[id], &n->head);
+	table->count++;
+
+	return n->data;
+}
+
+void *hashtable_get(struct hashtable *table, const void *data)
+{
+	struct slist_head *e;
+	uint32_t id;
+	struct hashtable_node *n;
+
+	id = table->hash(data, table);
+
+	slist_for_each(e, &table->members[id]) {
+		n = slist_entry(e, struct hashtable_node, head);
+		if (table->compare(n->data, data))
+			return n->data;
+	}
+
+	errno = ENOENT;
+	return NULL;
+}
+
+int hashtable_del(struct hashtable *table, void *data)
+{
+	struct slist_head *e, *next, *prev;
+	uint32_t id;
+	struct hashtable_node *n;
+
+	id = table->hash(data, table);
+
+	slist_for_each_safe(e, prev, next, &table->members[id]) {
+		n = slist_entry(e, struct hashtable_node, head);
+		if (table->compare(n->data, data)) {
+			slist_del(e, prev);
+			hashtable_destroy_node(n);
+			table->count--;
+			return 0;
+		}
+	}
+	errno = ENOENT;
+	return -1;
+}
+
+int hashtable_flush(struct hashtable *table)
+{
+	uint32_t i;
+	struct slist_head *e, *next, *prev;
+	struct hashtable_node *n;
+
+	for (i=0; i < table->hashsize; i++)
+		slist_for_each_safe(e, prev, next, &table->members[i]) {
+			n = slist_entry(e, struct hashtable_node, head);
+			slist_del(e, prev);
+			hashtable_destroy_node(n);
+		}
+
+	table->count = 0;
+
+	return 0;
+}
+
+int hashtable_iterate(struct hashtable *table, void *data,
+		      int (*iterate)(void *data1, void *data2))
+{
+	uint32_t i;
+	struct slist_head *e, *next, *prev;
+	struct hashtable_node *n;
+
+	for (i=0; i < table->hashsize; i++) {
+		slist_for_each_safe(e, prev, next, &table->members[i]) {
+			n = slist_entry(e, struct hashtable_node, head);
+			if (iterate(data, n->data) == -1)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+unsigned int hashtable_counter(const struct hashtable *table)
+{
+	return table->count;
+}
Index: ulogd2/include/ulogd/hash.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ ulogd2/include/ulogd/hash.h	2008-05-14 13:46:52.000000000 +0200
@@ -0,0 +1,48 @@
+#ifndef _NF_SET_HASH_H_
+#define _NF_SET_HASH_H_
+
+#include <unistd.h>
+#include "slist.h"
+#include <ulogd/linuxlist.h>
+
+#include <stdint.h>
+
+struct hashtable;
+struct hashtable_node;
+
+struct hashtable {
+	uint32_t hashsize;
+	uint32_t limit;
+	uint32_t count;
+	uint32_t initval;
+	uint32_t datasize;
+
+	uint32_t	(*hash)(const void *data, struct hashtable *table);
+	int		(*compare)(const void *data1, const void *data2);
+
+	struct slist_head 	members[0];
+};
+
+struct hashtable_node {
+	struct slist_head head;
+	char data[0];
+};
+
+struct hashtable_node *hashtable_alloc_node(int datasize, void *data);
+void hashtable_destroy_node(struct hashtable_node *h);
+
+struct hashtable *
+hashtable_create(int hashsize, int limit, int datasize,
+		 uint32_t (*hash)(const void *data, struct hashtable *table),
+		 int (*compare)(const void *data1, const void *data2));
+void hashtable_destroy(struct hashtable *h);
+
+void *hashtable_add(struct hashtable *table, void *data);
+void *hashtable_get(struct hashtable *table, const void *data);
+int hashtable_del(struct hashtable *table, void *data);
+int hashtable_flush(struct hashtable *table);
+int hashtable_iterate(struct hashtable *table, void *data,
+		      int (*iterate)(void *data1, void *data2));
+unsigned int hashtable_counter(const struct hashtable *table);
+
+#endif

[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux